├── Eagle
    ├── lmms_eval
    │   ├── __init__.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── instance.py
    │   │   └── filter.py
    │   ├── tasks
    │   │   ├── _task_utils
    │   │   │   ├── gpt_eval_utils.py
    │   │   │   └── file_utils.py
    │   │   ├── ok_vqa
    │   │   │   ├── _ok_vqa.yaml
    │   │   │   ├── ok_vqa_val2014.yaml
    │   │   │   ├── _default_template_vqa_yaml
    │   │   │   ├── _generate_config.py
    │   │   │   └── utils.py
    │   │   ├── flickr30k
    │   │   │   ├── flickr30k.yaml
    │   │   │   └── flickr30k_test.yaml
    │   │   ├── mmmu
    │   │   │   ├── mmmu.yaml
    │   │   │   ├── mmmu_val.yaml
    │   │   │   └── mmmu_test.yaml
    │   │   ├── docvqa
    │   │   │   ├── docvqa.yaml
    │   │   │   ├── docvqa_val.yaml
    │   │   │   ├── docvqa_test.yaml
    │   │   │   ├── _default_template_docvqa_yaml
    │   │   │   └── utils.py
    │   │   ├── vqav2
    │   │   │   ├── _vqav2.yaml
    │   │   │   ├── vqav2_test.yaml
    │   │   │   ├── vqav2_val.yaml
    │   │   │   └── _default_template_vqav2_yaml
    │   │   ├── cmmmu
    │   │   │   ├── _cmmmu.yaml
    │   │   │   ├── _default_template_cmmmu_yaml
    │   │   │   ├── cmmmu_test.yaml
    │   │   │   └── cmmmu_val.yaml
    │   │   ├── iconqa
    │   │   │   ├── iconqa.yaml
    │   │   │   ├── iconqa_test.yaml
    │   │   │   ├── iconqa_val.yaml
    │   │   │   ├── _default_template_docvqa_yaml
    │   │   │   └── utils.py
    │   │   ├── nocaps
    │   │   │   ├── nocaps.yaml
    │   │   │   ├── _default_template_nocaps_yaml
    │   │   │   ├── nocaps_test.yaml
    │   │   │   └── nocaps_val.yaml
    │   │   ├── infovqa
    │   │   │   ├── infovqa.yaml
    │   │   │   ├── infovqa_val.yaml
    │   │   │   ├── infovqa_test.yaml
    │   │   │   ├── _default_template_infovqa_yaml
    │   │   │   └── utils.py
    │   │   ├── textvqa
    │   │   │   ├── _textvqa.yaml
    │   │   │   ├── textvqa_test.yaml
    │   │   │   ├── textvqa_val.yaml
    │   │   │   ├── _default_template_textvqa_yaml
    │   │   │   └── utils.py
    │   │   ├── textcaps
    │   │   │   ├── textcaps.yaml
    │   │   │   ├── _default_template_textcaps_yaml
    │   │   │   ├── textcaps_test.yaml
    │   │   │   ├── textcaps_val.yaml
    │   │   │   └── textcaps_train.yaml
    │   │   ├── scienceqa
    │   │   │   ├── scienceqa_full.yaml
    │   │   │   ├── scienceqa.yaml
    │   │   │   ├── scienceqa_img.yaml
    │   │   │   └── utils.py
    │   │   ├── vizwiz_vqa
    │   │   │   ├── _vizwiz_vqa.yaml
    │   │   │   ├── vizwiz_vqa_val.yaml
    │   │   │   ├── vizwiz_vqa_test.yaml
    │   │   │   ├── _default_template_vqa_yaml
    │   │   │   ├── _generate_config.py
    │   │   │   └── utils.py
    │   │   ├── multidocvqa
    │   │   │   ├── multidocvqa.yaml
    │   │   │   ├── multidocvqa_test.yaml
    │   │   │   └── multidocvqa_val.yaml
    │   │   ├── coco_cap
    │   │   │   ├── coco2014_cap.yaml
    │   │   │   ├── coco2017_cap.yaml
    │   │   │   ├── coco_cap.yaml
    │   │   │   ├── coco2017_cap_test.yaml
    │   │   │   ├── coco2014_cap_test.yaml
    │   │   │   ├── coco2017_cap_val.yaml
    │   │   │   └── coco2014_cap_val.yaml
    │   │   ├── refcoco+
    │   │   │   ├── refcoco+_seg_val.yaml
    │   │   │   ├── refcoco+_bbox_val.yaml
    │   │   │   ├── refcoco+_bbox_testA.yaml
    │   │   │   ├── refcoco+_bbox_testB.yaml
    │   │   │   ├── refcoco+_seg_testA.yaml
    │   │   │   ├── refcoco+_seg_testB.yaml
    │   │   │   ├── _refcoco.yaml
    │   │   │   ├── _generate_config.py
    │   │   │   ├── _default_template_seg_yaml
    │   │   │   └── _default_template_bbox_yaml
    │   │   ├── refcoco
    │   │   │   ├── refcoco_bbox_val.yaml
    │   │   │   ├── refcoco_seg_test.yaml
    │   │   │   ├── refcoco_seg_val.yaml
    │   │   │   ├── refcoco_bbox_test.yaml
    │   │   │   ├── refcoco_seg_testA.yaml
    │   │   │   ├── refcoco_seg_testB.yaml
    │   │   │   ├── refcoco_bbox_testA.yaml
    │   │   │   ├── refcoco_bbox_testB.yaml
    │   │   │   ├── _refcoco.yaml
    │   │   │   ├── _generate_config.py
    │   │   │   ├── _default_template_seg_yaml
    │   │   │   └── _default_template_bbox_yaml
    │   │   ├── refcocog
    │   │   │   ├── refcocog_seg_val.yaml
    │   │   │   ├── refcocog_bbox_val.yaml
    │   │   │   ├── refcocog_seg_test.yaml
    │   │   │   ├── _refcoco.yaml
    │   │   │   ├── refcocog_bbox_test.yaml
    │   │   │   ├── _generate_config.py
    │   │   │   ├── _default_template_bbox_yaml
    │   │   │   └── _default_template_seg_yaml
    │   │   ├── olympiadbench
    │   │   │   ├── olympiadbench.yaml
    │   │   │   ├── olympiadbench_test_cn.yaml
    │   │   │   ├── olympiadbench_test_en.yaml
    │   │   │   └── cn_utils.py
    │   │   ├── mmbench
    │   │   │   ├── mmbench_cn.yaml
    │   │   │   ├── mmbench_en.yaml
    │   │   │   ├── mmbench_cn_test.yaml
    │   │   │   ├── mmbench_en_test.yaml
    │   │   │   ├── mmbench.yaml
    │   │   │   ├── mmbench_cn_dev.yaml
    │   │   │   ├── mmbench_en_dev.yaml
    │   │   │   ├── _default_template_mmbench_cn_yaml
    │   │   │   ├── _default_template_mmbench_en_yaml
    │   │   │   └── mmbench_cc.yaml
    │   │   ├── mathvista
    │   │   │   ├── mathvista.yaml
    │   │   │   ├── mathvista_test.yaml
    │   │   │   └── mathvista_testmini.yaml
    │   │   ├── seedbench
    │   │   │   ├── seedbench_ppl.yaml
    │   │   │   ├── seedbench.yaml
    │   │   │   └── utils.py
    │   │   ├── stvqa
    │   │   │   ├── stvqa.yaml
    │   │   │   └── utils.py
    │   │   ├── ocrbench
    │   │   │   └── ocrbench.yaml
    │   │   ├── gqa
    │   │   │   ├── gqa.yaml
    │   │   │   └── utils.py
    │   │   ├── chartqa
    │   │   │   ├── chartqa.yaml
    │   │   │   └── utils.py
    │   │   ├── ai2d
    │   │   │   ├── ai2d.yaml
    │   │   │   └── utils.py
    │   │   ├── mmvet
    │   │   │   └── mmvet.yaml
    │   │   ├── pope
    │   │   │   └── pope.yaml
    │   │   ├── mme
    │   │   │   └── mme.yaml
    │   │   ├── llava-bench-coco
    │   │   │   └── llava-bench-coco.yaml
    │   │   ├── ferret
    │   │   │   └── ferret.yaml
    │   │   ├── llava-in-the-wild
    │   │   │   └── llava-in-the-wild.yaml
    │   │   ├── hallusion_bench
    │   │   │   └── hallusion_bench_image.yaml
    │   │   └── seedbench_2
    │   │   │   ├── seedbench_2.yaml
    │   │   │   └── utils.py
    │   ├── models
    │   │   └── __init__.py
    │   └── filters
    │   │   ├── decontamination.py
    │   │   ├── __init__.py
    │   │   ├── transformation.py
    │   │   ├── selection.py
    │   │   └── extraction.py
    ├── eagle
    │   ├── model
    │   │   ├── language_model
    │   │   │   └── __init__.py
    │   │   ├── multimodal_encoder
    │   │   │   ├── __init__.py
    │   │   │   ├── vision_models
    │   │   │   │   └── __init__.py
    │   │   │   └── builder.py
    │   │   ├── multimodal_projector
    │   │   │   ├── __init__.py
    │   │   │   └── builder.py
    │   │   ├── __init__.py
    │   │   └── consolidate.py
    │   ├── __init__.py
    │   └── constants.py
    ├── Eagle.pdf
    ├── assets
    │   ├── Eagle.png
    │   ├── Logo.png
    │   ├── nvidia.jpeg
    │   ├── eagle-logo.png
    │   ├── fig-teaser.jpg
    │   ├── georgia-tech.jpeg
    │   ├── visual
    │   │   ├── AV1.png
    │   │   ├── AV2.png
    │   │   ├── Doc1.png
    │   │   ├── Doc2.png
    │   │   ├── Doc3.png
    │   │   ├── VQA1.png
    │   │   ├── VQA2.png
    │   │   └── VQA3.png
    │   ├── animal-compare.png
    │   ├── health-insurance.png
    │   └── leasing-apartment.png
    ├── train_mem.py
    ├── scripts
    │   ├── convert_mmvet_for_eval.py
    │   ├── eval_lmms_eval
    │   │   ├── eval-vizwiz-vqav2.sh
    │   │   ├── eval-mmbench-mathvista.sh
    │   │   └── eval-mme-seed-pope-sqa-gqa-ocrbench-textvqa-chartqa.sh
    │   ├── convert_gqa_for_eval.py
    │   ├── eval
    │   │   ├── mmmu.sh
    │   │   ├── pope.sh
    │   │   ├── textvqa.sh
    │   │   ├── vizwiz.sh
    │   │   ├── sqa.sh
    │   │   ├── mme.sh
    │   │   ├── vqav2.sh
    │   │   └── gqa.sh
    │   ├── zero2.json
    │   ├── zero3.json
    │   ├── convert_mmbench_for_submission.py
    │   ├── zero3_offload.json
    │   ├── convert_vizwiz_for_submission.py
    │   ├── pretrain-eagle-x4-vicuna-13b.sh
    │   ├── pretrain-eagle-x4-vicuna-7b.sh
    │   ├── pretrain-eagle-x5-vicuna-7b.sh
    │   ├── pretrain_eagle_x5_vicuna_7b.sh
    │   ├── pretrain-eagle-x5-vicuna-13b.sh
    │   ├── pretrain-eagle-x5-llama3-8b.sh
    │   ├── finetune-eagle-x4-vicuna-13b-1.8m.sh
    │   ├── finetune-eagle-x4-vicuna-7b-1.8m.sh
    │   ├── finetune-eagle-x5-vicuna-7b-1.8m.sh
    │   ├── finetune-eagle-x5-vicuna-13b-1.8m.sh
    │   ├── pretrain-eagle-x5-yi34b-cambrian.sh
    │   ├── finetune-eagle-x5-llama3-8b-1.8m.sh
    │   ├── finetune-eagle-x5-yi-34b-cambrian-7m.sh
    │   └── convert_vqav2_for_submission.py
    ├── setup.py
    └── requirements.txt
├── Eagle2_5
    ├── eaglevl
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── eagle2_5
    │   │   │   └── __init__.py
    │   ├── train
    │   │   ├── __init__.py
    │   │   └── constants.py
    │   ├── sp_utils
    │   │   ├── __init__.py
    │   │   ├── ring
    │   │   │   └── __init__.py
    │   │   └── attention.py
    │   └── patch
    │   │   ├── __init__.py
    │   │   └── fused_monkey_patch.py
    ├── Eagle2.pdf
    ├── Eagle2.5.pdf
    ├── streamlit_demo
    │   ├── gallery
    │   │   └── prod_1.jpeg
    │   ├── static
    │   │   └── SimHei.ttf
    │   ├── .streamlit
    │   │   └── config.toml
    │   ├── start_demo.sh
    │   ├── lasting_demo.sh
    │   ├── constants.py
    │   ├── forward_port.sh
    │   └── sd_worker.py
    ├── document
    │   ├── 4.streamlit_demo.md
    │   ├── 0.onboarding.md
    │   ├── how_to_use_lmdb_to_read_images.md
    │   ├── 1.installing.md
    │   └── 3.training.md
    ├── shell
    │   └── submit_prepare_job.sh
    ├── deepspeed_configs
    │   ├── zero_stage1_config_nooptim.json
    │   ├── zero_stage2_config.json
    │   ├── zero_stage1_config.json
    │   └── zero_stage3_config.json
    ├── deployment
    │   └── setup_x86.dockerfile
    ├── pyproject.toml
    └── README.md
├── .gitattributes
└── .gitignore


/Eagle/lmms_eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Eagle2_5/eaglevl/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Eagle2_5/eaglevl/train/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Eagle/eagle/model/language_model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Eagle/eagle/model/multimodal_encoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Eagle/eagle/model/multimodal_projector/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/_task_utils/gpt_eval_utils.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Eagle/eagle/model/multimodal_encoder/vision_models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Eagle/eagle/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import EagleLlamaForCausalLM
2 | 


--------------------------------------------------------------------------------
/Eagle/Eagle.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/Eagle/HEAD/Eagle/Eagle.pdf


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/ok_vqa/_ok_vqa.yaml:
--------------------------------------------------------------------------------
1 | group: ok_vqa
2 | task:
3 | - ok_vqa_val2014


--------------------------------------------------------------------------------
/Eagle2_5/Eagle2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/Eagle/HEAD/Eagle2_5/Eagle2.pdf


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/flickr30k/flickr30k.yaml:
--------------------------------------------------------------------------------
1 | group: flickr30k
2 | task:
3 | - flickr30k_test


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmmu/mmmu.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu
2 | task:
3 | - mmmu_val
4 | - mmmu_test
5 | 


--------------------------------------------------------------------------------
/Eagle2_5/Eagle2.5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/Eagle/HEAD/Eagle2_5/Eagle2.5.pdf


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/docvqa/docvqa.yaml:
--------------------------------------------------------------------------------
1 | group: docvqa
2 | task:
3 | - docvqa_val
4 | - docvqa_test


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/vqav2/_vqav2.yaml:
--------------------------------------------------------------------------------
1 | group: vqav2
2 | task:
3 | - vqav2_val
4 | - vqav2_test


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/cmmmu/_cmmmu.yaml:
--------------------------------------------------------------------------------
1 | group: cmmmu
2 | task:
3 | - cmmmu_val
4 | - cmmmu_test
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/iconqa/iconqa.yaml:
--------------------------------------------------------------------------------
1 | group: iconqa
2 | task:
3 | - iconqa_val
4 | - iconqa_test
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/nocaps/nocaps.yaml:
--------------------------------------------------------------------------------
1 | group : nocaps
2 | task:
3 |   - nocaps_test
4 |   - nocaps_val


--------------------------------------------------------------------------------
/Eagle/eagle/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .language_model.eagle_llama import EagleLlamaForCausalLM, EagleConfig
2 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/infovqa/infovqa.yaml:
--------------------------------------------------------------------------------
1 | group: infovqa
2 | task:
3 | - infovqa_val
4 | - infovqa_test
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/textvqa/_textvqa.yaml:
--------------------------------------------------------------------------------
1 | group: textvqa
2 | task:
3 | - textvqa_val
4 | - textvqa_test


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/textcaps/textcaps.yaml:
--------------------------------------------------------------------------------
1 | group : textcaps
2 | task:
3 |   - textcaps_val
4 |   - textcaps_test


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/scienceqa/scienceqa_full.yaml:
--------------------------------------------------------------------------------
1 | group: scienceqa_full
2 | task:
3 |   - scienceqa
4 |   - scienceqa_img


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/vizwiz_vqa/_vizwiz_vqa.yaml:
--------------------------------------------------------------------------------
1 | group: vizwiz_vqa
2 | task:
3 | - vizwiz_vqa_val
4 | - vizwiz_vqa_test


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/iconqa/iconqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: "iconqa_test"
2 | test_split: test
3 | include: _default_template_docvqa_yaml


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/iconqa/iconqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "iconqa_val"
2 | test_split: val
3 | include: _default_template_docvqa_yaml


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/multidocvqa/multidocvqa.yaml:
--------------------------------------------------------------------------------
1 | group: multidocvqa
2 | task:
3 | - multidocvqa_val
4 | - multidocvqa_test
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/coco_cap/coco2014_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco2014_cap
2 | task:
3 |   - coco2014_cap_val
4 |   - coco2014_cap_test


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/coco_cap/coco2017_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco2017_cap
2 | task:
3 |   - coco2017_cap_val
4 |   - coco2017_cap_test


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/ok_vqa/ok_vqa_val2014.yaml:
--------------------------------------------------------------------------------
1 | group: ok_vqa
2 | task: ok_vqa_val2014
3 | test_split: val2014
4 | include: _default_template_vqa_yaml


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco+/refcoco+_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_val
3 | include: _default_template_seg_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/refcoco_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_val
3 | test_split: val
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/refcoco_seg_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_test
3 | test_split: test
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/refcoco_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_val
3 | test_split: val
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcocog/refcocog_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_seg
2 | task: refcocog_seg_val
3 | include: _default_template_seg_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/Eagle/assets/Eagle.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:486012132ba08a5661c3ceaa45916e5acfaf863d495635010abc6a31f8f25aa1
3 | size 1829682
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/Logo.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:15c5c3fe6143e7b6f11a2ca9f58a63e3803a8fa1c521bc0c4eb86c935983ad45
3 | size 1415072
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/nvidia.jpeg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:77e41558cd89d3854d685b7f61e13d11ccf6564bab86655eaa278823f86f0a0e
3 | size 790457
4 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco+/refcoco+_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_val
3 | include: _default_template_bbox_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/refcoco_bbox_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_test
3 | test_split: test
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/refcoco_seg_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_testA
3 | test_split: testA
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/refcoco_seg_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_testB
3 | test_split: testB
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcocog/refcocog_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox
2 | task: refcocog_bbox_val
3 | include: _default_template_bbox_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcocog/refcocog_seg_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_seg
2 | task: refcocog_seg_test
3 | include: _default_template_seg_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/Eagle/assets/eagle-logo.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f21888d107fd09b1b09b8bcfaf36d5ed38230b21ca40ff32f7e4d349b2d30333
3 | size 534690
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/fig-teaser.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:0b1cf45b98f41b4ec65d0da7b0b913ea9566f0da7d06966a5d1e6147f60759a9
3 | size 483062
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/georgia-tech.jpeg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e35f5652aa4010d4c00547805deaab68a0ea7e1d8957503d4e64c19e2fcfdde4
3 | size 315669
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/visual/AV1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:3bd72cc5f0f3480dc18fcc5d9f7cf4bfa73ebac3281380fe2858c898944d9ba3
3 | size 898361
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/visual/AV2.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c21c5562ffe207fbd37a3efc7ba40eae4e4584a29a188d5095b0df649691f4ed
3 | size 961106
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/visual/Doc1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8738c6297fa86cf857e2199404df506c20a5ab70bce3c18f220e8a28c1565656
3 | size 156814
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/visual/Doc2.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:dd8b8ba8bbf5d5f179b12c0f9dd3a73c967e2e33ecf070df6905667febd30e26
3 | size 128759
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/visual/Doc3.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:58ec04202ccdb3483286aaa08fa230242e95a30baac4e7c45888b26bb6330dc4
3 | size 224869
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/visual/VQA1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:dc33fe4a96a9993b057d99fb096e958ef02dd8cd1ebd1827ce2d81a6fdf770bb
3 | size 820803
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/visual/VQA2.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ca807eef10fa03e58a8758e7abe32cbc1440fc7f38f3dbbae5474a894c54d50c
3 | size 1250156
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/visual/VQA3.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1203f3d32776a43ab38e6408aeaa3ebd8422205a6994b1c03174fa0a1a8d88ac
3 | size 228453
4 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco+/refcoco+_bbox_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_testA
3 | include: _default_template_bbox_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco+/refcoco+_bbox_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_testB
3 | include: _default_template_bbox_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco+/refcoco+_seg_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_testA
3 | include: _default_template_seg_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco+/refcoco+_seg_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_testB
3 | include: _default_template_seg_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/refcoco_bbox_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_testA
3 | test_split: testA
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/refcoco_bbox_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_testB
3 | test_split: testB
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcocog/_refcoco.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog
2 | task:
3 | - refcocog_seg_test
4 | - refcocog_seg_val
5 | - refcocog_bbox_test
6 | - refcocog_bbox_val
7 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcocog/refcocog_bbox_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox
2 | task: refcocog_bbox_test
3 | include: _default_template_bbox_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/Eagle/assets/animal-compare.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fa869d21fbdc6c252eac439a4be0c1716d2b7a0d79a1f5705979e85b7ea9328c
3 | size 926281
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/health-insurance.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a78b5d02c6936911850eb8410c0c6eb413cf3bc108b58d762d011855766a97af
3 | size 81714
4 | 


--------------------------------------------------------------------------------
/Eagle/assets/leasing-apartment.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:28011d0ed0f7c291a5f44b9756a591abe019004678238db99295dbcdbfcbbbe2
3 | size 193012
4 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/coco_cap/coco_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco_cap
2 | task:
3 |   - coco2014_cap_val
4 |   - coco2014_cap_test
5 |   - coco2017_cap_val
6 |   - coco2017_cap_test
7 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/nocaps/_default_template_nocaps_yaml:
--------------------------------------------------------------------------------
1 | model_specific_prompt_kwargs:
2 |   default:
3 |     prompt: "Provide a one-sentence caption for the provided image."


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/textcaps/_default_template_textcaps_yaml:
--------------------------------------------------------------------------------
1 | model_specific_prompt_kwargs:
2 |   default:
3 |     prompt: Provide a one-sentence caption for the provided image.


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/olympiadbench/olympiadbench.yaml:
--------------------------------------------------------------------------------
1 | group: olympiadbench
2 | task:
3 | - olympiadbench_test_en
4 | - olympiadbench_test_cn
5 | metadata:
6 |   - version: 0.0
7 | 


--------------------------------------------------------------------------------
/Eagle2_5/streamlit_demo/gallery/prod_1.jpeg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a1cce0deb560d17dd8fc55e8f6f7ca3bf8ed4ff558ca98b86616a8b32e855b73
3 | size 48925
4 | 


--------------------------------------------------------------------------------
/Eagle2_5/streamlit_demo/static/SimHei.ttf:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:336a838f4a78e150826be608dae69de59d50948c3d2b71760e096ae764154bdc
3 | size 9751960
4 | 


--------------------------------------------------------------------------------
/Eagle2_5/streamlit_demo/.streamlit/config.toml:
--------------------------------------------------------------------------------
1 | [theme]
2 | primaryColor="#F63366"
3 | backgroundColor="#FFFFFF"
4 | secondaryBackgroundColor="#F0F2F6"
5 | textColor="#262730"
6 | font="sans serif"
7 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco+/_refcoco.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+
2 | task:
3 | - refcoco+_seg_val
4 | - refcoco+_seg_testA
5 | - refcoco+_seg_testB
6 | - refcoco+_bbox_val
7 | - refcoco+_bbox_testA
8 | - refcoco+_bbox_testB
9 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/docvqa/docvqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "docvqa_val"
2 | test_split: validation
3 | metric_list:
4 |   - metric: anls
5 |     aggregation: mean
6 |     higher_is_better: true
7 | include: _default_template_docvqa_yaml
8 | 


--------------------------------------------------------------------------------
/Eagle2_5/document/4.streamlit_demo.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Streamlit demo
 3 | 
 4 | we have a streamlit demo:
 5 | 
 6 | ```
 7 | cd Eagle/Eagle2_5/streamlit_demo
 8 | bash start_demo.sh
 9 | # open demo http://localhost:8501 
10 | ```
11 | 


--------------------------------------------------------------------------------
/Eagle2_5/streamlit_demo/start_demo.sh:
--------------------------------------------------------------------------------
1 | python controller.py &
2 | python model_worker.py --model-path nvidia/Eagle2.5-8B \
3 | --model-name Eagle2_5  --port 6214 --worker-address http://127.0.0.1:6214 &
4 | streamlit run app.py
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmbench/mmbench_cn.yaml:
--------------------------------------------------------------------------------
1 | group: mmbench_cn
2 | task:
3 |   - mmbench_cn_dev
4 |   - mmbench_cn_test
5 |   - mmbench_cn_cc
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
9 |   sys_prompt: "有如下几个选项："


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmbench/mmbench_en.yaml:
--------------------------------------------------------------------------------
1 | group: mmbench_en
2 | task:
3 |   - mmbench_en_dev
4 |   - mmbench_en_test
5 | metadata:
6 |   version: 0.0
7 |   sys_prompt: "There are several options:"
8 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
9 | 


--------------------------------------------------------------------------------
/Eagle2_5/eaglevl/model/eagle2_5/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_eagle2_5_vl import Eagle2_5_VLConfig
2 | from .modeling_eagle2_5_vl import Eagle2_5_VLForConditionalGeneration
3 | 
4 | __all__ = ['Eagle2_5_VLConfig', 'Eagle2_5_VLForConditionalGeneration']
5 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/infovqa/infovqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "infovqa_val"
2 | test_split: validation
3 | output_type: generate_until
4 | metric_list:
5 |   - metric: anls
6 |     aggregation: mean
7 |     higher_is_better: true
8 | include: _default_template_infovqa_yaml


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/_refcoco.yaml:
--------------------------------------------------------------------------------
 1 | group: refcoco
 2 | task:
 3 | - refcoco_seg_test
 4 | - refcoco_seg_val
 5 | - refcoco_seg_testA
 6 | - refcoco_seg_testB
 7 | - refcoco_bbox_test
 8 | - refcoco_bbox_val
 9 | - refcoco_bbox_testA
10 | - refcoco_bbox_testB
11 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/textvqa/textvqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: textvqa_test
2 | test_split: test
3 | metric_list:
4 |   - metric: submission
5 |     aggregation: !function utils.textvqa_aggreate_submissions
6 |     higher_is_better: true
7 | include: _default_template_textvqa_yaml
8 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.jpg filter=lfs diff=lfs merge=lfs -text
2 | *.jpeg filter=lfs diff=lfs merge=lfs -text
3 | *.png filter=lfs diff=lfs merge=lfs -text
4 | *.ttf filter=lfs diff=lfs merge=lfs -text
5 | *.pt filter=lfs diff=lfs merge=lfs -text
6 | *.bin filter=lfs diff=lfs merge=lfs -text
7 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmbench/mmbench_cn_test.yaml:
--------------------------------------------------------------------------------
1 | task: mmbench_cn_test
2 | test_split: test
3 | metric_list:
4 |   - metric: submission
5 |     aggregation: !function cn_utils.mmbench_aggregate_test_results
6 |     higher_is_better: true
7 | include: _default_template_mmbench_cn_yaml
8 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmbench/mmbench_en_test.yaml:
--------------------------------------------------------------------------------
1 | task: "mmbench_en_test"
2 | test_split: test
3 | include: _default_template_mmbench_en_yaml
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function en_utils.mmbench_aggregate_test_results
7 |     higher_is_better: true
8 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/_task_utils/file_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | def generate_submission_file(file_name, args, subpath="submissions"):
5 |     path = os.path.join(args.output_path, subpath)
6 |     os.makedirs(path, exist_ok=True)
7 |     path = os.path.join(path, file_name)
8 |     return os.path.abspath(path)
9 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmbench/mmbench.yaml:
--------------------------------------------------------------------------------
 1 | group: mmbench
 2 | task:
 3 |   - mmbench_en_dev
 4 |   - mmbench_en_test
 5 |   - mmbench_cn_dev
 6 |   - mmbench_cn_test
 7 |   - mmbench_cn_cc
 8 | metadata:
 9 |   version: 0.0
10 |   sys_prompt: "There are several options:"
11 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"


--------------------------------------------------------------------------------
/Eagle/train_mem.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import transformer_engine
 3 |     import transformer_engine_extensions
 4 | except:
 5 |     print("having trouble importing transformer-engine!")
 6 |     
 7 | from train import train
 8 | 
 9 | if __name__ == "__main__":
10 |     train(attn_implementation="flash_attention_2")
11 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/cmmmu/_default_template_cmmmu_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/CMMMU
2 | output_type: generate_until
3 | doc_to_visual: !function utils.cmmmu_doc_to_visual
4 | doc_to_text: !function utils.cmmmu_doc_to_text
5 | doc_to_target: "answer"
6 | generation_kwargs:
7 |   max_new_tokens: 16
8 |   image_aspect_ratio: original


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mathvista/mathvista.yaml:
--------------------------------------------------------------------------------
 1 | group: mathvista
 2 | task:
 3 |   - mathvista_testmini
 4 |   - mathvista_test
 5 | metadata:
 6 |   version: 0.0
 7 |   # gpt_eval_model_name: "gpt-4-0613"
 8 |   # gpt_eval_model_name: "gpt-4o-2024-05-13"
 9 |   gpt_eval_model_name: "gpt-4-0125-preview"
10 |   quick_extract: false


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/vqav2/vqav2_test.yaml:
--------------------------------------------------------------------------------
1 | task: "vqav2_test"
2 | include: _default_template_vqav2_yaml
3 | test_split: test
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function utils.vqav2_aggreate_submissions
7 |     higher_is_better: true
8 | process_results: !function utils.vqav2_process_results_test
9 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/docvqa/docvqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: "docvqa_test"
2 | test_split: test
3 | process_results: !function utils.docvqa_test_process_results
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function utils.docvqa_test_aggregate_results
7 |     higher_is_better: true
8 | include: _default_template_docvqa_yaml
9 | 


--------------------------------------------------------------------------------
/Eagle2_5/eaglevl/sp_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .globals import get_pg_manager, set_pg_manager
2 | from .input_utils import split_for_sequence_parallel, gather_from_sequence_parallel, ring_split_for_sequence_parallel, ring_gather_for_sequence_parallel
3 | from .attention import pre_process_for_sequence_parallel_attn, post_process_for_sequence_parallel_attn


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/vqav2/vqav2_val.yaml:
--------------------------------------------------------------------------------
 1 | task: "vqav2_val"
 2 | include: _default_template_vqav2_yaml
 3 | test_split: validation
 4 | metric_list:
 5 |   - metric: exact_match
 6 |     aggregation: mean
 7 |     higher_is_better: true
 8 |     ignore_case: true
 9 |     ignore_punctuation: true
10 | process_results: !function utils.vqav2_process_results_val
11 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/infovqa/infovqa_test.yaml:
--------------------------------------------------------------------------------
 1 | task: "infovqa_test"
 2 | test_split: test
 3 | output_type: generate_until
 4 | process_results: !function utils.infovqa_test_process_results
 5 | metric_list:
 6 |   - metric: submission
 7 |     aggregation: !function utils.infovqa_test_aggregate_results
 8 |     higher_is_better: true
 9 | include: _default_template_infovqa_yaml
10 |   


--------------------------------------------------------------------------------
/Eagle/lmms_eval/models/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | AVAILABLE_MODELS = {
 4 |     "eagle": "Eagle",
 5 | }
 6 | 
 7 | for model_name, model_class in AVAILABLE_MODELS.items():
 8 |     try:
 9 |         exec(f"from .{model_name} import {model_class}")
10 |     except ImportError:
11 |         pass
12 | 
13 | 
14 | import hf_transfer
15 | 
16 | os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
17 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmbench/mmbench_cn_dev.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmbench_cn_dev"
 2 | test_split: "dev"
 3 | metric_list:
 4 |   - metric: gpt_eval_score
 5 |     aggregation: !function cn_utils.mmbench_aggregate_dev_results_eval
 6 |     higher_is_better: true
 7 |   - metric: submission
 8 |     higher_is_better: true
 9 |     aggregation: !function cn_utils.mmbench_aggregate_dev_results
10 | include: _default_template_mmbench_cn_yaml
11 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/textvqa/textvqa_val.yaml:
--------------------------------------------------------------------------------
 1 | task: textvqa_val
 2 | test_split: validation
 3 | metric_list:
 4 |   - metric: exact_match
 5 |     aggregation: mean
 6 |     higher_is_better: true
 7 |     ignore_case: true
 8 |     ignore_punctuation: true
 9 |   - metric: submission
10 |     aggregation: !function utils.textvqa_aggreate_submissions
11 |     higher_is_better: true
12 | include: _default_template_textvqa_yaml
13 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmbench/mmbench_en_dev.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmbench_en_dev"
 2 | test_split: dev
 3 | include: _default_template_mmbench_en_yaml
 4 | metric_list:
 5 |   - metric: gpt_eval_score
 6 |     aggregation: !function en_utils.mmbench_aggregate_dev_results_eval
 7 |     higher_is_better: true
 8 |   - metric: submission
 9 |     aggregation: !function en_utils.mmbench_aggregate_dev_results_submission
10 |     higher_is_better: true


--------------------------------------------------------------------------------
/Eagle2_5/streamlit_demo/lasting_demo.sh:
--------------------------------------------------------------------------------
1 | 
2 | submit_job --gpu 8 --tasks_per_node 1 --nodes 1 -n experiment --image /home/zhidingy/workspace/eagle2/torch2_test.sqsh \
3 |         --logroot workdir_lasting_demo_short \
4 |         --email_mode never \
5 |         --partition adlr_services \
6 |         --duration 0 \
7 |         --dependent_clones 0 \
8 |         -c "cd /home/zhidingy/workspace/eagle-video/streamlit_demo; bash start_demo.sh"
9 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val.yaml:
--------------------------------------------------------------------------------
 1 | group: vizwiz_vqa
 2 | task: vizwiz_vqa_val
 3 | test_split: val
 4 | include: _default_template_vqa_yaml
 5 | metric_list:
 6 |   - metric: exact_match
 7 |     aggregation: mean
 8 |     higher_is_better: true
 9 |     ignore_case: true
10 |     ignore_punctuation: true
11 |   # - metric: submission
12 |   #   aggregation: !function utils.vizwiz_vqa_aggreate_submissions
13 |   #   higher_is_better: true


--------------------------------------------------------------------------------
/Eagle/eagle/constants.py:
--------------------------------------------------------------------------------
 1 | # This file is from https://github.com/haotian-liu/LLaVA/
 2 | 
 3 | CONTROLLER_HEART_BEAT_EXPIRATION = 30
 4 | WORKER_HEART_BEAT_INTERVAL = 15
 5 | 
 6 | LOGDIR = "."
 7 | 
 8 | # Model Constants
 9 | IGNORE_INDEX = -100
10 | IMAGE_TOKEN_INDEX = -200
11 | DEFAULT_IMAGE_TOKEN = "<image>"
12 | DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
13 | DEFAULT_IM_START_TOKEN = "<im_start>"
14 | DEFAULT_IM_END_TOKEN = "<im_end>"
15 | IMAGE_PLACEHOLDER = "<image-placeholder>"
16 | 


--------------------------------------------------------------------------------
/Eagle/scripts/convert_mmvet_for_eval.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import argparse
 4 | 
 5 | parser = argparse.ArgumentParser()
 6 | parser.add_argument("--src", type=str)
 7 | parser.add_argument("--dst", type=str)
 8 | args = parser.parse_args()
 9 | 
10 | cur_result = {}
11 | 
12 | for line in open(args.src):
13 |     data = json.loads(line)
14 |     qid = data['question_id']
15 |     cur_result[f'v1_{qid}'] = data['text']
16 | 
17 | with open(args.dst, 'w') as f:
18 |     json.dump(cur_result, f, indent=2)
19 | 


--------------------------------------------------------------------------------
/Eagle/scripts/eval_lmms_eval/eval-vizwiz-vqav2.sh:
--------------------------------------------------------------------------------
 1 | MODEL_PATH=$1
 2 | MODEL_NAME=$2
 3 | CONV_MODE=$3
 4 | 
 5 | accelerate launch --num_processes=8\
 6 |            evaluate_lmms_eval.py \
 7 |            --model eagle \
 8 |            --model_args pretrained=${MODEL_PATH},conv_template=${CONV_MODE} \
 9 |            --tasks  vizwiz_vqa_test,vqav2_test \
10 |            --batch_size 1 \
11 |            --log_samples \
12 |            --log_samples_suffix ${MODEL_NAME}_vizwiz_vqav2 \
13 |            --output_path ./logs/ 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/vqav2/_default_template_vqav2_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VQAv2
 2 | dataset_kwargs:
 3 |   token: True
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.vqav2_doc_to_visual
 6 | doc_to_text: !function utils.vqav2_doc_to_text
 7 | doc_to_target: "answer"
 8 | generation_kwargs:
 9 |   max_new_tokens: 16
10 | metadata:
11 |   - version: 0.0
12 | model_specific_prompt_kwargs:
13 |   default:
14 |     pre_prompt: ""
15 |     post_prompt: "\nAnswer the question using a single word or phrase."


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_test.yaml:
--------------------------------------------------------------------------------
 1 | group: vizwiz_vqa
 2 | task: vizwiz_vqa_test
 3 | test_split: test
 4 | include: _default_template_vqa_yaml
 5 | process_results: !function utils.vizwiz_vqa_process_results
 6 | metric_list:
 7 |   # - metric: exact_match
 8 |   #   aggregation: mean
 9 |   #   higher_is_better: true
10 |   #   ignore_case: true
11 |   #   ignore_punctuation: true
12 |   - metric: submission
13 |     aggregation: !function utils.vizwiz_vqa_aggreate_submissions
14 |     higher_is_better: true
15 | 


--------------------------------------------------------------------------------
/Eagle/scripts/eval_lmms_eval/eval-mmbench-mathvista.sh:
--------------------------------------------------------------------------------
 1 | MODEL_PATH=$1
 2 | MODEL_NAME=$2
 3 | CONV_MODE=$3
 4 | 
 5 | accelerate launch --num_processes=8\
 6 |            evaluate_lmms_eval.py \
 7 |            --model eagle \
 8 |            --model_args pretrained=${MODEL_PATH},conv_template=${CONV_MODE} \
 9 |            --tasks mmbench_en_dev,mathvista_testmini \
10 |            --batch_size 1 \
11 |            --log_samples \
12 |            --log_samples_suffix ${MODEL_NAME}_mmbench_mathvista \
13 |            --output_path ./logs/ 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/infovqa/_default_template_infovqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/DocVQA
 2 | dataset_name: InfographicVQA
 3 | dataset_kwargs:
 4 |   token: True
 5 | doc_to_target: "answers"
 6 | doc_to_visual: !function utils.infovqa_doc_to_visual
 7 | doc_to_text: !function utils.infovqa_doc_to_text
 8 | generation_kwargs:
 9 |   max_new_tokens: 32
10 |   temperature: 0
11 |   do_sample: False
12 | model_specific_prompt_kwargs:
13 |   default:
14 |     pre_prompt: ""
15 |     post_prompt: "\nAnswer the question using a single word or phrase."


--------------------------------------------------------------------------------
/Eagle/scripts/convert_gqa_for_eval.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import argparse
 4 | 
 5 | parser = argparse.ArgumentParser()
 6 | parser.add_argument("--src", type=str)
 7 | parser.add_argument("--dst", type=str)
 8 | args = parser.parse_args()
 9 | 
10 | all_answers = []
11 | for line_idx, line in enumerate(open(args.src)):
12 |     res = json.loads(line)
13 |     question_id = res['question_id']
14 |     text = res['text'].rstrip('.').lower()
15 |     all_answers.append({"questionId": question_id, "prediction": text})
16 | 
17 | with open(args.dst, 'w') as f:
18 |     json.dump(all_answers, f)
19 | 


--------------------------------------------------------------------------------
/Eagle/scripts/eval_lmms_eval/eval-mme-seed-pope-sqa-gqa-ocrbench-textvqa-chartqa.sh:
--------------------------------------------------------------------------------
 1 | MODEL_PATH=$1
 2 | MODEL_NAME=$2
 3 | CONV_MODE=$3
 4 | 
 5 | accelerate launch --num_processes=8\
 6 |            evaluate_lmms_eval.py \
 7 |            --model eagle \
 8 |            --model_args pretrained=${MODEL_PATH},conv_template=${CONV_MODE} \
 9 |            --tasks  mme,seed_bench,pope,scienceqa_img,gqa,ocrbench,textvqa_val,chartqa \
10 |            --batch_size 1 \
11 |            --log_samples \
12 |            --log_samples_suffix ${MODEL_NAME}_mmbench_mathvista_seedbench \
13 |            --output_path ./logs/ 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/cmmmu/cmmmu_test.yaml:
--------------------------------------------------------------------------------
 1 | task: "cmmmu_test"
 2 | test_split: test
 3 | # The return value of process_results will be used by metrics
 4 | process_results: !function utils.cmmmu_process_test_results_for_submission
 5 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
 6 | metric_list:
 7 |   - metric: submission
 8 |     aggregation: !function utils.cmmmu_test_aggregate_results_for_submission
 9 |     higher_is_better: false
10 | metadata:
11 |   - version: 0.0
12 | include: _default_template_cmmmu_yaml
13 | 


--------------------------------------------------------------------------------
/Eagle/scripts/eval/mmmu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | MODEL_CKPT=$1
 4 | MODEL_NAME=$2
 5 | 
 6 | SAVE_DIR=playground/data/eval/mmmu/${MODEL_NAME}
 7 | SPLIT=validation
 8 | MMMU_DATA_ROOT=./playground/data/eval/MMMU
 9 | 
10 | python eagle/eval/model_vqa_mmmu.py \
11 |     --model_path ${MODEL_CKPT} \
12 |     --split ${SPLIT} \
13 |     --output_path ${SAVE_DIR}/${SPLIT}_output.json \
14 | 
15 | output_file=${SAVE_DIR}/${SPLIT}_output.json 
16 | echo "saving model answer at $output_file"
17 | 
18 | python ./eval_utils/mmmu/main_eval_only.py --output_path ${SAVE_DIR}/${SPLIT}_output.json


--------------------------------------------------------------------------------
/Eagle2_5/streamlit_demo/constants.py:
--------------------------------------------------------------------------------
 1 | # This file is adopted from the InternVL project
 2 | # (https://github.com/OpenGVLab/InternVL), licensed under the MIT License.
 3 | #
 4 | # --------------------------------------------------------
 5 | # InternVL
 6 | # Copyright (c) 2023 OpenGVLab
 7 | # Licensed under The MIT License
 8 | # --------------------------------------------------------
 9 | 
10 | CONTROLLER_HEART_BEAT_EXPIRATION = 30
11 | WORKER_HEART_BEAT_INTERVAL = 15
12 | 
13 | LOGDIR = 'logs/'
14 | 
15 | server_error_msg = '**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**'
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__
 3 | *.pyc
 4 | *.egg-info
 5 | dist
 6 | 
 7 | # Log
 8 | *.log
 9 | *.log.*
10 | logs/
11 | # *.json
12 | *.jsonl
13 | images/*
14 | 
15 | # Data
16 | !**/alpaca-data-conversation.json
17 | 
18 | # Editor
19 | .idea
20 | *.swp
21 | .github
22 | .vscode
23 | 
24 | # Other
25 | .DS_Store
26 | wandb
27 | output
28 | examples/*
29 | 
30 | eagle/eval/table/*.json
31 | eagle/eval/table/results/*.json
32 | checkpoints
33 | ckpts*
34 | slurm_logs/*
35 | 
36 | datasets/*
37 | playground/*
38 | gradio_tmp/*
39 | 
40 | .ipynb_checkpoints
41 | *.ipynb
42 | 
43 | unit_test*.*
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/textvqa/_default_template_textvqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/textvqa
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.textvqa_doc_to_visual
 4 | doc_to_text: !function utils.textvqa_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | process_results: !function utils.textvqa_process_results
10 | model_specific_prompt_kwargs:
11 |   default:
12 |     pre_prompt: ""
13 |     post_prompt: "\nAnswer the question using a single word or phrase."
14 |     ocr: true
15 |   qwen_vl:
16 |     pre_prompt: ""
17 |     post_prompt: " Answer:"
18 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/seedbench/seedbench_ppl.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/SEED-Bench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "seedbench_ppl"
 5 | test_split: test
 6 | output_type: multiple_choice
 7 | doc_to_visual: !function utils.seed_doc_to_visual
 8 | doc_to_text: !function utils.seed_doc_to_text_mc
 9 | doc_to_choice : !function utils.seed_doc_to_choice
10 | doc_to_target: !function utils.seed_doc_to_mc_target
11 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
12 | metric_list:
13 |   - metric: acc
14 | metadata:
15 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/docvqa/_default_template_docvqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/DocVQA
 2 | dataset_name: DocVQA
 3 | dataset_kwargs:
 4 |   token: True
 5 | output_type: generate_until
 6 | doc_to_visual: !function utils.docvqa_doc_to_visual
 7 | doc_to_text: !function utils.docvqa_doc_to_text
 8 | doc_to_target: "answers"
 9 | generation_kwargs:
10 |   max_new_tokens: 32
11 |   temperature: 0
12 |   do_sample: False
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "\nAnswer the question using a single word or phrase."
17 |   qwen_vl:
18 |     pre_prompt: ""
19 |     post_prompt: " Answer:"
20 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/cmmmu/cmmmu_val.yaml:
--------------------------------------------------------------------------------
 1 | task: "cmmmu_val"
 2 | test_split: val
 3 | # The return value of process_results will be used by metrics
 4 | process_results: !function utils.cmmmu_process_results
 5 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
 6 | generation_kwargs:
 7 |   max_new_tokens: 16
 8 |   image_aspect_ratio: original
 9 | metric_list:
10 |   - metric: cmmmu_acc
11 |     aggregation: !function utils.cmmmu_aggregate_results
12 |     higher_is_better: true
13 | metadata:
14 |   - version: 0.0
15 | include: _default_template_cmmmu_yaml
16 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/vizwiz_vqa/_default_template_vqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VizWiz-VQA
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.vizwiz_vqa_doc_to_visual
 4 | doc_to_text: !function utils.vizwiz_vqa_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | metadata:
10 |   - version: 0.0
11 | model_specific_prompt_kwargs:
12 |   default:
13 |     pre_prompt: ""
14 |     post_prompt: "\nWhen the provided information is insufficient, respond with 'Unanswerable'.\nAnswer the question using a single word or phrase."
15 | process_results: !function utils.vizwiz_vqa_process_results
16 | 


--------------------------------------------------------------------------------
/Eagle/scripts/eval/pope.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | CKPT=$1
 3 | NAME=$2
 4 | 
 5 | python -m eagle.eval.model_vqa_loader \
 6 |     --model-path $CKPT \
 7 |     --question-file ./playground/data/eval/pope/llava_pope_test.jsonl \
 8 |     --image-folder ./playground/data/eval/pope/val2014 \
 9 |     --answers-file ./playground/data/eval/pope/answers/${NAME}.jsonl \
10 |     --temperature 0 \
11 |     --conv-mode vicuna_v1
12 | 
13 | python eagle/eval/eval_pope.py \
14 |     --annotation-dir ./playground/data/eval/pope/coco \
15 |     --question-file ./playground/data/eval/pope/llava_pope_test.jsonl \
16 |     --result-file ./playground/data/eval/pope/answers/${NAME}.jsonl
17 | 


--------------------------------------------------------------------------------
/Eagle/scripts/zero2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "bf16": {
11 |         "enabled": "auto"
12 |     },
13 |     "train_micro_batch_size_per_gpu": "auto",
14 |     "train_batch_size": "auto",
15 |     "gradient_accumulation_steps": "auto",
16 |     "zero_optimization": {
17 |         "stage": 2,
18 |         "overlap_comm": false,
19 |         "contiguous_gradients": true,
20 |         "sub_group_size": 1e9,
21 |         "reduce_bucket_size": "auto"
22 |     }
23 | }


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/stvqa/stvqa.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ST-VQA
 2 | task: "stvqa"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.stvqa_doc_to_visual
 6 | doc_to_text: !function utils.stvqa_doc_to_text
 7 | doc_to_target: "answers"
 8 | generation_kwargs:
 9 |   max_new_tokens: 32
10 |   temperature: 0
11 |   do_sample: False
12 | process_results: !function utils.stvqa_process_results
13 | metric_list:
14 |   - metric: submission
15 |     aggregation: !function utils.stvqa_aggregate_submissions
16 | model_specific_prompt_kwargs:
17 |   default:
18 |     pre_prompt: ""
19 |     post_prompt: "\nAnswer the question using a single word or phrase."
20 |   


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/ocrbench/ocrbench.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: echo840/OCRBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "ocrbench"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.ocrbench_doc_to_visual
 8 | doc_to_text: !function utils.ocrbench_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   max_new_tokens: 128
12 |   temperature: 0
13 |   top_p: 0
14 |   num_beams: 1
15 |   do_sample: false
16 | process_results: !function utils.ocrbench_process_results
17 | metric_list:
18 |   - metric: ocrbench_accuracy
19 |     aggregation: !function utils.ocrbench_aggregate_accuracy
20 |     higher_is_better: true
21 | metadata:
22 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle/scripts/eval/textvqa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | CKPT=$1
 3 | NAME=$2
 4 | DATA_ROOT=$(readlink -f "./playground/data/eval/textvqa/")
 5 | 
 6 | python -m eagle.eval.model_vqa_loader \
 7 |     --model-path $CKPT \
 8 |     --question-file ./playground/data/eval/textvqa/llava_textvqa_val_v051_ocr.jsonl \
 9 |     --image-folder ./playground/data/eval/textvqa/train_images \
10 |     --answers-file ./playground/data/eval/textvqa/answers/${NAME}.jsonl \
11 |     --temperature 0 \
12 |     --conv-mode vicuna_v1
13 | 
14 | python -m eagle.eval.eval_textvqa \
15 |     --annotation-file ./playground/data/eval/textvqa/TextVQA_0.5.1_val.json \
16 |     --result-file ./playground/data/eval/textvqa/answers/${NAME}.jsonl
17 | 


--------------------------------------------------------------------------------
/Eagle/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name="eagle",
 5 |     version="0.0.1",
 6 |     packages=find_packages(include=["eagle", "eagle.*"]),
 7 |     package_data={
 8 |         "eagle": ["*"],
 9 |         "eagle.model": ["*"],
10 |         "eagle.model.language_model": ["*"],
11 |     },
12 |     py_modules=[
13 |         "eagle.conversation",
14 |         "eagle.constants",
15 |         "eagle.model.builder",
16 |         "eagle.model.language_model",
17 |         "eagle.utils",
18 |         "eagle.mm_utils",
19 |     ],
20 |     install_requires=[
21 |         # Add any dependencies required by the eagle module
22 |     ],
23 |     include_package_data=True,
24 | )
25 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmbench/_default_template_mmbench_cn_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | doc_to_target: "answer"
 5 | dataset_name: "cn"
 6 | output_type: generate_until
 7 | doc_to_visual: !function cn_utils.mmbench_doc_to_visual
 8 | doc_to_text: !function cn_utils.mmbench_doc_to_text
 9 | generation_kwargs:
10 |   max_new_tokens: 256
11 |   temperature: 0
12 |   top_p: 0
13 |   num_beams: 1
14 |   do_sample: false
15 | process_results: !function cn_utils.mmbench_process_results
16 | model_specific_prompt_kwargs:
17 |   default:
18 |     pre_prompt: ""
19 |     post_prompt: "\n请直接使用所提供的选项字母作为答案回答。"
20 | model_specific_generation_kwargs:
21 |   llava:
22 |     image_aspect_ratio: original
23 | 


--------------------------------------------------------------------------------
/Eagle2_5/eaglevl/patch/__init__.py:
--------------------------------------------------------------------------------
 1 | from .pad_data_collator import  pad_data_collator, get_collator
 2 | from .train_sampler_patch import replace_train_sampler, replace_train_sampler_for_online_packing, OnlinePackingGroupedSampler
 3 | from .fused_monkey_patch import replace_liger_fused_ops
 4 | from .train_sampler_patch import Packer
 5 | from .packing_attention import patch_packing_attention
 6 | __all__ = ['replace_llama_attn_with_flash_attn',
 7 |            'replace_llama2_attn_with_flash_attn',
 8 |            'replace_train_sampler',
 9 |            'replace_train_sampler_for_online_packing',
10 |            'OnlinePackingGroupedSampler',
11 |            'pad_data_collator',
12 |            'get_collator',
13 |            'replace_liger_fused_ops']


--------------------------------------------------------------------------------
/Eagle/scripts/eval/vizwiz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | CKPT=$1
 3 | NAME=$2
 4 | DATA_ROOT=$(readlink -f "./playground/data/eval/vizwiz")
 5 | LOCAL_ANSWER_DIR="./playground/data/eval_local_files/vizwiz"
 6 | 
 7 | python -m eagle.eval.model_vqa_loader \
 8 |     --model-path $CKPT \
 9 |     --question-file $DATA_ROOT/llava_test.jsonl \
10 |     --image-folder $DATA_ROOT/test \
11 |     --answers-file $LOCAL_ANSWER_DIR/$NAME/$NAME.jsonl \
12 |     --temperature 0 \
13 |     --conv-mode vicuna_v1
14 | 
15 | python scripts/convert_vizwiz_for_submission.py \
16 |     --annotation-file $DATA_ROOT/llava_test.jsonl \
17 |     --result-file $LOCAL_ANSWER_DIR/$NAME/$NAME.jsonl \
18 |     --result-upload-file $LOCAL_ANSWER_DIR/$NAME/answers_upload/vizwiz_test_$NAME.json
19 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/filters/decontamination.py:
--------------------------------------------------------------------------------
 1 | from lmms_eval.api.filter import Filter
 2 | 
 3 | 
 4 | class DecontaminationFilter(Filter):
 5 |     """
 6 |     A filter which evaluates
 7 |     """
 8 | 
 9 |     name = "track_decontamination"
10 | 
11 |     def __init__(self, path) -> None:
12 |         """
13 | 
14 |         TODO: make sure only ever run one time on the train set (should this be cached as a class var? keyed by value for "path").
15 |         should further cache result on a given (task_name, doc_id)
16 |         """
17 |         self._decontam_results = None
18 | 
19 |     def apply(self, resps, docs) -> None:
20 |         """
21 |         Return {"no_contamination", "only_contamination"} keys for the 2 different subsets
22 |         """
23 |         pass
24 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/multidocvqa/multidocvqa_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MP-DocVQA
 2 | task: "multidocvqa_test"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.multidocvqa_doc_to_visual
 6 | doc_to_text: !function utils.multidocvqa_doc_to_text
 7 | doc_to_target: "answers"
 8 | generation_kwargs:
 9 |   max_new_tokens: 32
10 |   temperature: 0
11 |   do_sample: False
12 | process_results: !function utils.multidocvqa_process_test_results_for_submission
13 | metric_list:
14 |   - metric: submission
15 |     aggregation: !function utils.multidocvqa_test_aggregate_results_for_submission
16 | model_specific_prompt_kwargs:
17 |   default:
18 |     pre_prompt: ""
19 |     post_prompt: "\nAnswer the question using a single word or phrase."
20 |   


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmmu/mmmu_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMMU
 2 | task: "mmmu_val"
 3 | test_split: validation
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.mmmu_doc_to_visual
 6 | doc_to_text: !function utils.mmmu_doc_to_text
 7 | doc_to_target: "answer"
 8 | # The return value of process_results will be used by metrics
 9 | process_results: !function utils.mmmu_process_results
10 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   image_aspect_ratio: original
14 | metric_list:
15 |   - metric: mmmu_acc
16 |     aggregation: !function utils.mmmu_aggregate_results
17 |     higher_is_better: true
18 | metadata:
19 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle2_5/streamlit_demo/forward_port.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | # 从文件中读取 IP 地址
 5 | SERVER_IP=10.49.134.111
 6 | # 验证 IP 地址格式（可选）
 7 | if [[ ! $SERVER_IP =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
 8 |   echo "Error: Invalid IP address format in $IP_FILE"
 9 |   exit 1
10 | fi
11 | 
12 | # 目标端口和本地监听端口
13 | TARGET_PORT=8501
14 | LOCAL_PORT=9120
15 | 
16 | # 打印信息
17 | echo "Setting up socat to forward local port $LOCAL_PORT to $SERVER_IP:$TARGET_PORT"
18 | 
19 | # 启动 socat
20 | socat TCP-LISTEN:$LOCAL_PORT,fork TCP:$SERVER_IP:$TARGET_PORT 
21 | # SOCAT_PID=$!
22 | 
23 | # 打印成功信息
24 | # echo "Socat is running with PID $SOCAT_PID. Forwarding $LOCAL_PORT to $SERVER_IP:$TARGET_PORT"
25 | 
26 | # 可选：等待用户手动终止
27 | # read -p "Press Enter to stop socat and exit..."
28 | 
29 | # 可选：停止 socat
30 | # kill $SOCAT_PID
31 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmmu/mmmu_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMMU
 2 | task: "mmmu_test"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.mmmu_doc_to_visual
 6 | doc_to_text: !function utils.mmmu_doc_to_text
 7 | doc_to_target: "answer"
 8 | # The return value of process_results will be used by metrics
 9 | process_results: !function utils.mmmu_process_results
10 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   image_aspect_ratio: original
14 | metric_list:
15 |   - metric: submission
16 |     aggregation: !function utils.mmmu_test_aggregate_results_for_submission
17 |     higher_is_better: true
18 | metadata:
19 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle/scripts/eval/sqa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | CKPT=$1
 3 | NAME=$2
 4 | 
 5 | python -m eagle.eval.model_vqa_science \
 6 |     --model-path $CKPT \
 7 |     --question-file ./playground/data/eval/scienceqa/llava_test_CQM-A.json \
 8 |     --image-folder ./playground/data/eval/scienceqa/images/test \
 9 |     --answers-file ./playground/data/eval/scienceqa/answers/${NAME}.jsonl \
10 |     --single-pred-prompt \
11 |     --temperature 0 \
12 |     --conv-mode vicuna_v1
13 | 
14 | python eagle/eval/eval_science_qa.py \
15 |     --base-dir ./playground/data/eval/scienceqa \
16 |     --result-file ./playground/data/eval/scienceqa/answers/${NAME}.jsonl \
17 |     --output-file ./playground/data/eval/scienceqa/answers/${NAME}_output.jsonl \
18 |     --output-result ./playground/data/eval/scienceqa/answers/${NAME}_result.json
19 | 


--------------------------------------------------------------------------------
/Eagle2_5/shell/submit_prepare_job.sh:
--------------------------------------------------------------------------------
 1 | set -a
 2 | source .env
 3 | set +a
 4 | 
 5 | RECIPE_PATH=${1:-"local_playground/recipe/stage1.json"}
 6 | NODES=${2:-1}
 7 | LOG_DIR=${3:-"work_dirs/data_prepare"}
 8 | TOKENIZER=${4:-"Qwen/Qwen3-1.7B"}
 9 | LAUNCHER=${5:-"pytorch"}
10 | 
11 | 
12 | bash shell/prepare.sh ${RECIPE_PATH} ${NODES} ${LOG_DIR} ${TOKENIZER} ${LAUNCHER}
13 | 
14 | # submit_job \
15 | #     --image=${TRAINING_IMAGE_PATH} \
16 | #     --gpu 8 \
17 | #     --tasks_per_node 8 \
18 | #     --nodes ${NODES} \
19 | #     -n prepare_data \
20 | #     --logroot ${LOG_DIR} \
21 | #     --email_mode never \
22 | #     --duration 0 \
23 | #     --cpu 128 \
24 | #     --dependent_clones 3 \
25 | #     --partition adlr_services \
26 | #     -c "bash shell/eagle_abc/prepare.sh  ${RECIPE_PATH} ${NODES} ${LOG_DIR} ${TOKENIZER} ${LAUNCHER}"
27 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmbench/_default_template_mmbench_en_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | doc_to_target: "answer"
 5 | model_specific_prompt_kwargs:
 6 |   default:
 7 |     pre_prompt: ""
 8 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
 9 | doc_to_visual: !function en_utils.mmbench_doc_to_visual
10 | doc_to_text: !function en_utils.mmbench_doc_to_text
11 | doc_to_target: "answer"
12 | process_results: !function en_utils.mmbench_process_results
13 | model_specific_generation_kwargs:
14 |   llava:
15 |     image_aspect_ratio: original
16 | output_type: generate_until
17 | dataset_name: "en"
18 | generation_kwargs:
19 |   until:
20 |     - "ASSISTANT:"
21 |   max_new_tokens: 1024
22 |   temperature: 0
23 |   top_p: 0
24 |   num_beams: 1
25 |   do_sample: false
26 | 


--------------------------------------------------------------------------------
/Eagle2_5/deepspeed_configs/zero_stage1_config_nooptim.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "zero_optimization": {
 3 |     "stage": 1,
 4 |     "allgather_partitions": true,
 5 |     "allgather_bucket_size": 1e9,
 6 |     "overlap_comm": true,
 7 |     "reduce_scatter": true,
 8 |     "reduce_bucket_size": 1e9,
 9 |     "contiguous_gradients": true
10 |   },
11 |   "fp16": {
12 |     "enabled": "auto",
13 |     "auto_cast": true,
14 |     "loss_scale": 0,
15 |     "initial_scale_power": 32,
16 |     "loss_scale_window": 1000,
17 |     "hysteresis": 2,
18 |     "min_loss_scale": 1
19 |   },
20 |   "bf16": {
21 |     "enabled": "auto"
22 |   },
23 |   "gradient_accumulation_steps": "auto",
24 |   "gradient_clipping": "auto",
25 |   "steps_per_print": 2000,
26 |   "train_batch_size": "auto",
27 |   "train_micro_batch_size_per_gpu": "auto",
28 |   "wall_clock_breakdown": true
29 | }


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/olympiadbench/olympiadbench_test_cn.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/OlympiadBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "olympiadbench_test_cn"
 5 | test_split: test_cn
 6 | output_type: generate_until
 7 | doc_to_visual: !function cn_utils.olympiadbench_doc_to_visual
 8 | doc_to_text: !function cn_utils.olympiadbench_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function cn_utils.olympiadbench_process_results
19 | metric_list:
20 |   - metric: submission
21 |     aggregation: !function cn_utils.olympiadbench_aggregate_results
22 |     higher_is_better: true
23 |   - metric: exact_match
24 |     aggregation: mean
25 |     higher_is_better: true


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/olympiadbench/olympiadbench_test_en.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/OlympiadBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "olympiadbench_test_en"
 5 | test_split: test_en
 6 | output_type: generate_until
 7 | doc_to_visual: !function en_utils.olympiadbench_doc_to_visual
 8 | doc_to_text: !function en_utils.olympiadbench_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function en_utils.olympiadbench_process_results
19 | metric_list:
20 |   - metric: submission
21 |     aggregation: !function en_utils.olympiadbench_aggregate_results
22 |     higher_is_better: true
23 |   - metric: exact_match
24 |     aggregation: mean
25 |     higher_is_better: true


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/coco_cap/coco2017_cap_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/COCO-Caption2017
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "coco2017_cap_test"
 5 | group : "coco_caption2017"
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.coco_doc_to_visual
 9 | doc_to_text: !function utils.coco_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 128
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.coco_test_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: coco_passthrough 
21 |     aggregation : !function utils.coco_test_aggregation_result
22 |     higher_is_better : true
23 | metadata:
24 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/multidocvqa/multidocvqa_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MP-DocVQA
 2 | task: "multidocvqa_val"
 3 | test_split: val
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.multidocvqa_doc_to_visual
 6 | doc_to_text: !function utils.multidocvqa_doc_to_text
 7 | doc_to_target: "answers"
 8 | generation_kwargs:
 9 |   max_new_tokens: 32
10 |   temperature: 0
11 |   do_sample: False
12 | process_results: !function utils.multidocvqa_process_results
13 | metric_list:
14 |   - metric: anls
15 |     aggregation: !function utils.multidocvqa_aggregate_results_anls
16 |     higher_is_better: true
17 |   - metric: accuracy
18 |     aggregation: !function utils.multidocvqa_aggregate_results_accuracy
19 |     higher_is_better: true
20 | model_specific_prompt_kwargs:
21 |   default:
22 |     pre_prompt: ""
23 |     post_prompt: "\nAnswer the question using a single word or phrase."
24 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/gqa/gqa.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/GQA
 2 | dataset_name: testdev_balanced_instructions
 3 | dataset_kwargs:
 4 |   token: True
 5 | task: "gqa"
 6 | test_split: testdev
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.gqa_doc_to_visual
 9 | doc_to_text: !function utils.gqa_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | metric_list:
18 |   - metric: exact_match
19 |     aggregation: mean
20 |     higher_is_better: true
21 |     ignore_case: true
22 |     ignore_punctuation: true
23 | metadata:
24 |   - version: 0.0
25 |   
26 | model_specific_prompt_kwargs:
27 |   default:
28 |     pre_prompt: ""
29 |     post_prompt: "\nAnswer the question using a single word or phrase."
30 |   qwen_vl:
31 |     pre_prompt: ""
32 |     post_prompt: " Answer:"


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/coco_cap/coco2014_cap_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/COCO-Caption
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "coco2014_cap_test"
 5 | group : "coco_caption"
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.coco_doc_to_visual
 9 | doc_to_text: "Provide a one-sentence caption for the provided image."
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 128
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.coco_test_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: coco_passthrough 
21 |     aggregation : !function utils.coco_test_aggregation_result
22 |     higher_is_better : true
23 | metadata:
24 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/gqa/utils.py:
--------------------------------------------------------------------------------
 1 | from datasets import load_dataset
 2 | 
 3 | GQA_RAW_IMAGE_DATASET = None
 4 | GQA_ID2IMAGE = None
 5 | 
 6 | 
 7 | def gqa_doc_to_visual(doc):
 8 |     global GQA_RAW_IMAGE_DATASET
 9 |     global GQA_ID2IMAGE
10 |     if GQA_RAW_IMAGE_DATASET is None:
11 |         GQA_RAW_IMAGE_DATASET = load_dataset("lmms-lab/GQA", "testdev_balanced_images", split="testdev", token=True)
12 |         GQA_ID2IMAGE = {}
13 |         for row in GQA_RAW_IMAGE_DATASET:
14 |             GQA_ID2IMAGE[row["id"]] = row["image"].convert("RGB")
15 |     image = GQA_ID2IMAGE[doc["imageId"]]
16 |     return [image]
17 | 
18 | 
19 | def gqa_doc_to_text(doc, model_specific_prompt_kwargs):
20 |     question = doc["question"]
21 |     pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
22 |     post_prompt = model_specific_prompt_kwargs["post_prompt"]
23 |     return f"{pre_prompt}{question}{post_prompt}"
24 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/ok_vqa/_default_template_vqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/OK-VQA
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.ok_vqa_doc_to_visual
 4 | doc_to_text: !function utils.ok_vqa_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | metric_list:
10 |   - metric: exact_match
11 |     aggregation: mean
12 |     higher_is_better: true
13 |     ignore_case: true
14 |     ignore_punctuation: true
15 |   - metric: submission
16 |     aggregation: !function utils.ok_vqa_aggreate_submissions
17 |     higher_is_better: true
18 | process_results: !function utils.ok_vqa_process_results
19 | model_specific_prompt_kwargs:
20 |   default:
21 |     pre_prompt: ""
22 |     post_prompt: "\nWhen the provided information is insufficient, respond with 'Unanswerable'.\nAnswer the question using a single word or phrase."
23 | metadata:
24 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle2_5/eaglevl/train/constants.py:
--------------------------------------------------------------------------------
 1 | IMG_CONTEXT_TOKEN = '<IMG_CONTEXT>'
 2 | IMG_START_TOKEN = '<img>'
 3 | IMG_END_TOKEN = '</img>'
 4 | QUAD_START_TOKEN = '<quad>'
 5 | QUAD_END_TOKEN = '</quad>'
 6 | REF_START_TOKEN = '<ref>'
 7 | REF_END_TOKEN = '</ref>'
 8 | BOX_START_TOKEN = '<box>'
 9 | BOX_END_TOKEN = '</box>'
10 | INTERVAL_START_TOKEN = '<interval>'
11 | INTERVAL_END_TOKEN = '</interval>'
12 | IMAGENET_MEAN = (0.485, 0.456, 0.406)
13 | IMAGENET_STD = (0.229, 0.224, 0.225)
14 | CLIP_MEAN = (0.4814546, 0.4578275, 0.40821073)
15 | CLIP_STD = (0.2686295, 0.2613025, 0.2757711)
16 | SIGLIP_MEAN = (0.5, 0.5, 0.5)
17 | SIGLIP_STD = (0.5, 0.5, 0.5)
18 | 
19 | special_tokens_list = [
20 |     IMG_CONTEXT_TOKEN,
21 |     IMG_START_TOKEN, IMG_END_TOKEN,
22 |     BOX_START_TOKEN, BOX_END_TOKEN,
23 |     QUAD_START_TOKEN, QUAD_END_TOKEN,
24 |     REF_START_TOKEN, REF_END_TOKEN,
25 |     INTERVAL_START_TOKEN, INTERVAL_END_TOKEN,
26 | ]
27 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/nocaps/nocaps_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/NoCaps
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "nocaps_test"
 5 | group : "nocaps_caption"
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.nocaps_doc_to_visual
 9 | doc_to_text: !function utils.nocaps_doc_to_text
10 | doc_to_target: "annotations_captions"
11 | generation_kwargs:
12 |   max_new_tokens: 64
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.nocaps_test_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: nocaps_passthrough 
21 |     aggregation : !function utils.nocaps_test_aggregation_result
22 |     higher_is_better : true
23 | metadata:
24 |   - version: 0.0
25 | include: _default_template_nocaps_yaml


--------------------------------------------------------------------------------
/Eagle/scripts/zero3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "bf16": {
11 |         "enabled": "auto"
12 |     },
13 |     "train_micro_batch_size_per_gpu": "auto",
14 |     "train_batch_size": "auto",
15 |     "gradient_accumulation_steps": "auto",
16 |     "zero_optimization": {
17 |         "stage": 3,
18 |         "overlap_comm": true,
19 |         "contiguous_gradients": true,
20 |         "sub_group_size": 1e9,
21 |         "reduce_bucket_size": "auto",
22 |         "stage3_prefetch_bucket_size": "auto",
23 |         "stage3_param_persistence_threshold": "auto",
24 |         "stage3_max_live_parameters": 1e9,
25 |         "stage3_max_reuse_distance": 1e9,
26 |         "stage3_gather_16bit_weights_on_model_save": true
27 |     }
28 | }


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mathvista/mathvista_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: AI4Math/MathVista
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "mathvista_test"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.mathvista_doc_to_visual
 8 | doc_to_text: !function utils.mathvista_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function utils.mathvista_process_results
19 | metric_list:
20 |   - metric: submission
21 |     aggregation: !function utils.mathvista_aggregate_results
22 |     higher_is_better: true
23 | 
24 | model_specific_prompt_kwargs:
25 |   default:
26 |     shot_type: "format-prompt" # can be "reason-first", "solution", "step-by-step"
27 | model_specific_generation_kwargs:
28 |   llava:
29 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/textcaps/textcaps_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/TextCaps
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "textcaps_test"
 5 | group : "textcaps_caption"
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.textcaps_doc_to_visual
 9 | doc_to_text: !function utils.textcaps_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 64
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.textcaps_test_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: textcaps_passthrough 
21 |     aggregation : !function utils.textcaps_test_aggregation_result
22 |     higher_is_better : true
23 | metadata:
24 |   - version: 0.0
25 | include: _default_template_textcaps_yaml


--------------------------------------------------------------------------------
/Eagle/scripts/eval/mme.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | CKPT=$1
 3 | NAME=$2
 4 | MME_DATA_ROOT=$(readlink -f "./playground/data/eval/MME")
 5 | 
 6 | python -m eagle.eval.model_vqa_loader \
 7 |     --model-path $CKPT \
 8 |     --question-file ./playground/data/eval/MME/llava_mme.jsonl \
 9 |     --image-folder ./playground/data/eval/MME/MME_Benchmark_release_version \
10 |     --answers-file ./playground/data/eval/MME/answers/${NAME}.jsonl \
11 |     --temperature 0 \
12 |     --conv-mode vicuna_v1
13 | 
14 | cd ./playground/data/eval/MME
15 | 
16 | # python convert_answer_to_mme.py --experiment ${NAME}.jsonl
17 | 
18 | # cd eval_tool
19 | 
20 | # python calculation.py --results_dir answers/${NAME}
21 | 
22 | python convert_answer_to_mme.py --experiment ${MME_DATA_ROOT}/answers/${NAME}.jsonl --data_path ${MME_DATA_ROOT}/MME_Benchmark_release_version
23 | 
24 | cd eval_tool
25 | 
26 | python calculation.py --results_dir ${MME_DATA_ROOT}/answers/${NAME}_mme_results
27 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/iconqa/_default_template_docvqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ICON-QA
 2 | dataset_kwargs:
 3 |   token: True
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.doc_to_visual
 6 | doc_to_text: !function utils.doc_to_text
 7 | doc_to_target: "answers"
 8 | # process_results: !function utils.test_process_results
 9 | generation_kwargs:
10 |   max_new_tokens: 32
11 |   temperature: 0
12 |   do_sample: False
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     statement: "Given a set of images and a question, please provide the answer to the question.\n"
17 |     options_statement: "Question: {question}.\nOptions:\n{options}\nPlease answer with the option letter from the given choices directly."
18 |     freeform_statement: "Question: {question}.\nPlease answer the question using a single word or phrase."
19 | metric_list:
20 |   - metric: anls
21 |     aggregation: mean
22 |     higher_is_better: true


--------------------------------------------------------------------------------
/Eagle2_5/eaglevl/sp_utils/ring/__init__.py:
--------------------------------------------------------------------------------
 1 | # Adopted from https://github.com/zhuzilin/ring-flash-attention.
 2 | # Implementation refers to Ring Attention Paper: https://arxiv.org/abs/2310.01889
 3 | 
 4 | 
 5 | from .ring_flash_attn import ring_flash_attn_func, ring_flash_attn_kvpacked_func, ring_flash_attn_qkvpacked_func
 6 | from .ring_flash_attn_varlen import (
 7 |     ring_flash_attn_varlen_func,
 8 |     ring_flash_attn_varlen_kvpacked_func,
 9 |     ring_flash_attn_varlen_qkvpacked_func,
10 | )
11 | from .stripe_flash_attn import stripe_flash_attn_func, stripe_flash_attn_kvpacked_func, stripe_flash_attn_qkvpacked_func
12 | from .zigzag_ring_flash_attn import (
13 |     zigzag_ring_flash_attn_func,
14 |     zigzag_ring_flash_attn_kvpacked_func,
15 |     zigzag_ring_flash_attn_qkvpacked_func,
16 | )
17 | from .zigzag_ring_flash_attn_varlen import (
18 |     zigzag_ring_flash_attn_varlen_func,
19 |     zigzag_ring_flash_attn_varlen_qkvpacked_func,
20 | )
21 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mathvista/mathvista_testmini.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: AI4Math/MathVista
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "mathvista_testmini"
 5 | test_split: testmini
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.mathvista_doc_to_visual
 8 | doc_to_text: !function utils.mathvista_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function utils.mathvista_process_results
19 | metric_list:
20 |   - metric: gpt_eval_score
21 |     aggregation: !function utils.mathvista_aggregate_results
22 |     higher_is_better: true
23 |   
24 | model_specific_prompt_kwargs:
25 |   default:
26 |     shot_type: "format-prompt" # can be "reason-first", "solution", "step-by-step"
27 | model_specific_generation_kwargs:
28 |   llava:
29 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/ok_vqa/_generate_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | splits = ["val2014"]
 5 | tasks = ["vqa"]
 6 | 
 7 | if __name__ == "__main__":
 8 |     dump_tasks = []
 9 |     for task in tasks:
10 |         for split in splits:
11 |             yaml_dict = {"group": f"ok_vqa", "task": f"ok_vqa_{split}", "include": f"_default_template_{task}_yaml", "test_split": split}
12 |             if split == "train":
13 |                 yaml_dict.pop("group")
14 |             else:
15 |                 dump_tasks.append(f"ok_vqa_{split}")
16 | 
17 |             save_path = f"./ok_vqa_{split}.yaml"
18 |             print(f"Saving to {save_path}")
19 |             with open(save_path, "w") as f:
20 |                 yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
21 | 
22 |     group_dict = {"group": "ok_vqa", "task": dump_tasks}
23 | 
24 |     with open("./_ok_vqa.yaml", "w") as f:
25 |         yaml.dump(group_dict, f, default_flow_style=False, indent=4)
26 | 


--------------------------------------------------------------------------------
/Eagle/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==2.3.1
 2 | torchvision
 3 | transformers==4.44.2 
 4 | tokenizers==0.19.1 
 5 | sentencepiece==0.2.0
 6 | shortuuid
 7 | accelerate==0.34.2
 8 | peft
 9 | bitsandbytes
10 | pydantic 
11 | markdown2[all]
12 | numpy 
13 | scikit-learn==1.2.2
14 | gradio==4.16.0
15 | gradio_client==0.8.1
16 | requests
17 | httpx==0.27.0
18 | uvicorn
19 | protobuf==3.20.0
20 | fastapi
21 | einops==0.6.1
22 | einops-exts==0.0.4
23 | timm==0.9.11
24 | opencv-python
25 | fvcore
26 | # these lib are required for the lmms-eval
27 | black==24.1.0
28 | datasets==2.16.1
29 | evaluate>=0.4.0
30 | jsonlines
31 | numexpr
32 | pybind11>=2.6.2
33 | pytablewriter
34 | rouge-score>=0.0.4
35 | sacrebleu>=1.5.0
36 | scikit-learn>=0.24.1
37 | sqlitedict
38 | openai>=1.0.0
39 | pycocoevalcap
40 | tqdm-multiprocess
41 | zstandard
42 | pyyaml
43 | sympy
44 | mpmath
45 | Jinja2
46 | openpyxl
47 | Levenshtein
48 | hf_transfer
49 | tenacity
50 | transformers-stream-generator
51 | tiktoken
52 | pre-commit
53 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/stvqa/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import logging
 4 | 
 5 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
 6 | 
 7 | 
 8 | def stvqa_doc_to_text(doc, model_specific_prompt_kwargs):
 9 |     question = doc["question"]
10 |     pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
11 |     post_prompt = model_specific_prompt_kwargs["post_prompt"]
12 |     return f"{pre_prompt}{question}{post_prompt}"
13 | 
14 | 
15 | def stvqa_doc_to_visual(doc):
16 |     return [doc["image"].convert("RGB")]
17 | 
18 | 
19 | def stvqa_process_results(doc, results):
20 |     answer = results[0]
21 |     return {"submission": {"question_id": int(doc["question_id"]), "answer": answer}}
22 | 
23 | 
24 | def stvqa_aggregate_submissions(results, args):
25 |     file = generate_submission_file("stvqa_test_for_submission.json", args)
26 |     with open(file, "w") as f:
27 |         json.dump(results, f)
28 |     logging.getLogger("lmms-eval").info(f"Results saved to {file}")
29 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/vizwiz_vqa/_generate_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | splits = ["val", "test"]
 5 | tasks = ["vqa"]
 6 | 
 7 | if __name__ == "__main__":
 8 |     dump_tasks = []
 9 |     for task in tasks:
10 |         for split in splits:
11 |             yaml_dict = {"group": f"vizwiz_{task}", "task": f"vizwiz_{task}_{split}", "include": f"_default_template_{task}_yaml", "test_split": split}
12 |             if split == "train":
13 |                 yaml_dict.pop("group")
14 |             else:
15 |                 dump_tasks.append(f"vizwiz_{task}_{split}")
16 | 
17 |             save_path = f"./vizwiz_{task}_{split}.yaml"
18 |             print(f"Saving to {save_path}")
19 |             with open(save_path, "w") as f:
20 |                 yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
21 | 
22 |     group_dict = {"group": "vizwiz_vqa", "task": dump_tasks}
23 | 
24 |     with open("./_vizwiz_vqa.yaml", "w") as f:
25 |         yaml.dump(group_dict, f, default_flow_style=False, indent=4)
26 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/chartqa/chartqa.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ChartQA
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "chartqa"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.chartqa_doc_to_visual
 8 | doc_to_text: !function utils.chartqa_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   max_new_tokens: 16
12 |   temperature: 0
13 |   do_sample: False
14 | process_results: !function utils.chartqa_process_results
15 | metric_list:
16 |   - metric: relaxed_overall
17 |     aggregation: mean
18 |     higher_is_better: true
19 |   - metric: relaxed_human_split
20 |     aggregation: mean
21 |     higher_is_better: true
22 |   - metric: relaxed_augmented_split
23 |     aggregation: mean
24 |     higher_is_better: true
25 | metadata:
26 |   - version: 0.0
27 | model_specific_prompt_kwargs:
28 |   default:
29 |     pre_prompt: ""
30 |     post_prompt: "\nAnswer the question with a single word."
31 |   qwen_vl:
32 |     pre_prompt: ""
33 |     post_prompt: " Answer:"
34 | 
35 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/ai2d/ai2d.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ai2d
 2 | task: "ai2d"
 3 | dataset_kwargs:
 4 |   token: True
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.ai2d_doc_to_visual
 8 | doc_to_text: !function utils.ai2d_doc_to_text
 9 | doc_to_target: !function utils.ai2d_doc_to_target
10 | generation_kwargs:
11 |   max_new_tokens: 16
12 |   temperature: 0
13 |   do_sample: False
14 | metric_list:
15 |   - metric: exact_match
16 |     aggregation: mean
17 |     higher_is_better: true
18 |     ignore_case: true
19 |     ignore_punctuation: true
20 | metadata:
21 |   - version: 0.0
22 | 
23 | model_specific_prompt_kwargs:
24 |   default:
25 |     prompt_format: mcq
26 |     pre_prompt: ""
27 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
28 |   # qwen formulate ai2d as question answering instead of mcq
29 |   qwen_vl:
30 |     prompt_format: qa
31 |     pre_prompt: ""
32 |     post_prompt: " Answer:"
33 | 
34 | model_specific_target_kwargs:
35 |   default: "mcq"
36 |   qwen_vl: "qa"


--------------------------------------------------------------------------------
/Eagle/lmms_eval/api/instance.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import Literal, Tuple
 3 | 
 4 | 
 5 | @dataclass
 6 | class Instance:
 7 |     request_type: Literal["loglikelihood", "generate_until"]
 8 |     arguments: tuple
 9 |     idx: int
10 |     metadata: Tuple[str, int, int] = field(default_factory=lambda: (None, None, None))  # TODO: better typehints here
11 |     resps: list = field(default_factory=list)
12 |     filtered_resps: dict = field(default_factory=dict)
13 | 
14 |     # initialized after init
15 |     task_name: str = None
16 |     doc_id: str = None
17 |     repeats: str = None
18 |     doc: dict = None
19 | 
20 |     def __post_init__(self) -> None:
21 |         # unpack metadata field
22 |         self.task_name, self.doc_id, self.repeats = self.metadata
23 | 
24 |     @property
25 |     def args(self):
26 |         """
27 |         Returns (string,) where `string` is the string to calculate loglikelihood over
28 |         """
29 |         return self.arguments if isinstance(self.arguments, tuple) else (self.arguments,)
30 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/scienceqa/scienceqa.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ScienceQA
 2 | dataset_name: ScienceQA-FULL
 3 | task: "scienceqa"
 4 | dataset_kwargs:
 5 |   token: True
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.sqa_doc_to_visual
 9 | doc_to_text: !function utils.sqa_doc_to_text
10 | doc_to_target: !function utils.sqa_doc_to_target
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   temperature: 0
14 |   do_sample: False
15 | metric_list:
16 |   - metric: exact_match
17 |     aggregation: mean
18 |     higher_is_better: true
19 |     ignore_case: true
20 |     ignore_punctuation: true
21 | process_results: !function utils.sqa_process_results
22 | metadata:
23 |   - version: 0.0
24 | 
25 | model_specific_prompt_kwargs:
26 |   default:
27 |     format: default
28 |     pre_prompt: ""
29 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
30 |   qwen_vl:
31 |     format: qwen_vl
32 |   
33 | model_specific_generation_kwargs:
34 |   llava:
35 |     image_aspect_ratio: original
36 |   
37 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/scienceqa/scienceqa_img.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ScienceQA
 2 | dataset_name: ScienceQA-IMG
 3 | task: "scienceqa_img"
 4 | dataset_kwargs:
 5 |   token: True
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.sqa_doc_to_visual
 9 | doc_to_text: !function utils.sqa_doc_to_text
10 | doc_to_target: !function utils.sqa_doc_to_target
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   temperature: 0
14 |   do_sample: False
15 | metric_list:
16 |   - metric: exact_match
17 |     aggregation: mean
18 |     higher_is_better: true
19 |     ignore_case: true
20 |     ignore_punctuation: true
21 | process_results: !function utils.sqa_process_results
22 | metadata:
23 |   - version: 0.0
24 | 
25 | model_specific_prompt_kwargs:
26 |   default:
27 |     format: default
28 |     pre_prompt: ""
29 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
30 |   qwen_vl:
31 |     format: qwen_vl
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original
35 |   
36 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcocog/_generate_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | # splits = ["train", "test", "val"]
 5 | splits = ["test", "val"]
 6 | tasks = ["seg", "bbox"]
 7 | 
 8 | if __name__ == "__main__":
 9 |     dump_tasks = []
10 |     for task in tasks:
11 |         for split in splits:
12 |             yaml_dict = {"group": f"refcocog_{task}", "task": f"refcocog_{task}_{split}", "include": f"_default_template_{task}_yaml", "test_split": split}
13 |             if split == "train":
14 |                 yaml_dict.pop("group")
15 |             else:
16 |                 dump_tasks.append(f"refcoco_{task}_{split}")
17 | 
18 |             save_path = f"./refcocog_{task}_{split}.yaml"
19 |             print(f"Saving to {save_path}")
20 |             with open(save_path, "w") as f:
21 |                 yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
22 | 
23 |     group_dict = {"group": "refcocog", "task": dump_tasks}
24 | 
25 |     with open("./_refcoco.yaml", "w") as f:
26 |         yaml.dump(group_dict, f, default_flow_style=False, indent=4)
27 | 


--------------------------------------------------------------------------------
/Eagle2_5/deepspeed_configs/zero_stage2_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "zero_optimization": {
 3 |     "stage": 2,
 4 |     "allgather_partitions": true,
 5 |     "allgather_bucket_size": 1e8,
 6 |     "overlap_comm": true,
 7 |     "reduce_scatter": true,
 8 |     "reduce_bucket_size": 1e8,
 9 |     "contiguous_gradients": true
10 |   },
11 |   "fp16": {
12 |     "enabled": "auto",
13 |     "auto_cast": true,
14 |     "loss_scale": 0,
15 |     "initial_scale_power": 32,
16 |     "loss_scale_window": 1000,
17 |     "hysteresis": 2,
18 |     "min_loss_scale": 1
19 |   },
20 |   "bf16": {
21 |     "enabled": "auto"
22 |   },
23 |   "optimizer": {
24 |     "type": "AdamW",
25 |     "params": {
26 |       "lr": "auto",
27 |       "betas": [
28 |         0.9,
29 |         0.999
30 |       ],
31 |       "eps": 1e-8,
32 |       "weight_decay": "auto"
33 |     }
34 |   },
35 |   "gradient_accumulation_steps": "auto",
36 |   "gradient_clipping": "auto",
37 |   "steps_per_print": 2000,
38 |   "train_batch_size": "auto",
39 |   "train_micro_batch_size_per_gpu": "auto",
40 |   "wall_clock_breakdown": false
41 | }
42 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/seedbench/seedbench.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/SEED-Bench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "seedbench"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.seed_doc_to_visual
 8 | doc_to_text: !function utils.seed_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   image_aspect_ratio: original
14 | # The return value of process_results will be used by metrics
15 | process_results: !function utils.seed_process_result
16 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
17 | metric_list:
18 |   - metric: seed_image
19 |     aggregation: !function utils.seed_aggregation_result
20 |     higher_is_better: true
21 |   - metric: seed_video
22 |     aggregation: !function utils.seed_aggregation_result
23 |     higher_is_better: true
24 |   - metric: seed_all
25 |     aggregation: !function utils.seed_aggregation_result
26 |     higher_is_better: true
27 | metadata:
28 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco+/_generate_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | # splits = ["train", "val", "testA", "testB"]
 5 | splits = ["val", "testA", "testB"]
 6 | tasks = ["seg", "bbox"]
 7 | 
 8 | if __name__ == "__main__":
 9 |     dump_tasks = []
10 |     for task in tasks:
11 |         for split in splits:
12 |             yaml_dict = {"group": f"refcoco+_{task}", "task": f"refcoco+_{task}_{split}", "include": f"_default_template_{task}_yaml", "test_split": split}
13 |             if split == "train":
14 |                 yaml_dict.pop("group")
15 |             else:
16 |                 dump_tasks.append(f"refcoco_{task}_{split}")
17 | 
18 |             save_path = f"./refcoco+_{task}_{split}.yaml"
19 |             print(f"Saving to {save_path}")
20 |             with open(save_path, "w") as f:
21 |                 yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
22 | 
23 |     group_dict = {"group": "refcoco+", "task": dump_tasks}
24 | 
25 |     with open("./_refcoco.yaml", "w") as f:
26 |         yaml.dump(group_dict, f, default_flow_style=False, indent=4)
27 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/_generate_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | # splits = ["train", "test", "val", "testA", "testB"]
 5 | splits = ["test", "val", "testA", "testB"]
 6 | tasks = ["seg", "bbox"]
 7 | 
 8 | if __name__ == "__main__":
 9 |     dump_tasks = []
10 |     for task in tasks:
11 |         for split in splits:
12 |             yaml_dict = {"group": f"refcoco_{task}", "task": f"refcoco_{task}_{split}", "test_split": split, "include": f"_default_template_{task}_yaml"}
13 |             if split == "train":
14 |                 yaml_dict.pop("group")
15 |             else:
16 |                 dump_tasks.append(f"refcoco_{task}_{split}")
17 | 
18 |             save_path = f"./refcoco_{task}_{split}.yaml"
19 |             print(f"Saving to {save_path}")
20 |             with open(save_path, "w") as f:
21 |                 yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
22 | 
23 |     group_dict = {"group": "refcoco", "task": dump_tasks}
24 | 
25 |     with open("./_refcoco.yaml", "w") as f:
26 |         yaml.dump(group_dict, f, default_flow_style=False, indent=4)
27 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmbench/mmbench_cc.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMBench
 2 | dataset_name: cc
 3 | dataset_kwargs:
 4 |   token: True
 5 | task: "mmbench_cn_cc"
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function cc_utils.mmbench_doc_to_visual
 9 | doc_to_text: !function cc_utils.mmbench_cn_cc_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 256
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function cc_utils.mmbench_cn_cc_process_results
18 | metric_list:
19 |   - metric: gpt_eval_score
20 |     aggregation: !function cc_utils.mmbench_cn_cc_aggregate_dev_results_eval
21 |     higher_is_better: true
22 |   - metric: submission
23 |     aggregation: !function cc_utils.mmbench_cn_cc_aggregate_results
24 | metadata:
25 |   version: 0.0
26 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
27 | 
28 | model_specific_prompt_kwargs:
29 |   default:
30 |     pre_prompt: ""
31 |     post_prompt: "\n请直接使用所提供的选项字母作为答案回答。"
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mmvet/mmvet.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMVet
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "mmvet"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.mmvet_doc_to_visual
 8 | doc_to_text: !function utils.doc_to_text # Such that {{question}} will be replaced by doc["question"]
 9 | doc_to_target: "{{answer}}"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function utils.mmvet_process_results # apply gpt eval here
19 | metric_list:
20 |   - metric: gpt_eval_score
21 |     aggregation: !function utils.mmvet_aggregate_results
22 |     higher_is_better: true
23 | metadata:
24 |   version: 0.0
25 |   # gpt_eval_model_name: "gpt-4-0613"
26 |   # gpt_eval_model_name: "gpt-4-0125-preview"
27 |   # gpt_eval_model_name: "gpt-4o-2024-05-13"
28 |   gpt_eval_model_name: "gpt-4"  
29 |   # gpt_eval_model_name: "gpt-3.5-turbo-0613"
30 | model_specific_prompt_kwargs:
31 |   default:
32 |     pre_prompt: ""
33 |     post_prompt: ""
34 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/infovqa/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import logging
 4 | 
 5 | 
 6 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
 7 | 
 8 | lmms_logger = logging.getLogger("lmms-eval")
 9 | 
10 | 
11 | def infovqa_doc_to_visual(doc):
12 |     return [doc["image"].convert("RGB")]
13 | 
14 | 
15 | def infovqa_doc_to_text(doc, model_specific_prompt_kwargs):
16 |     question = doc["question"]
17 |     pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
18 |     post_prompt = model_specific_prompt_kwargs["post_prompt"]
19 |     return f"{pre_prompt}{question}{post_prompt}"
20 | 
21 | 
22 | def infovqa_test_process_results(doc, results):
23 |     pred = results[0]
24 |     questionId = doc["questionId"]
25 |     return {"submission": {"questionId": int(questionId), "answer": pred}}
26 | 
27 | 
28 | def infovqa_test_aggregate_results(results, args):
29 |     # save results as json
30 |     file = generate_submission_file("infovqa_test_for_submission.json", args)
31 |     with open(file, "w") as f:
32 |         json.dump(results, f)
33 |     lmms_logger.info(f"Results saved to {file}")
34 | 


--------------------------------------------------------------------------------
/Eagle/scripts/convert_mmbench_for_submission.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import argparse
 4 | import pandas as pd
 5 | 
 6 | def get_args():
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument("--annotation-file", type=str, required=True)
 9 |     parser.add_argument("--result-dir", type=str, required=True)
10 |     parser.add_argument("--upload-dir", type=str, required=True)
11 |     parser.add_argument("--experiment", type=str, required=True)
12 | 
13 |     return parser.parse_args()
14 | 
15 | if __name__ == "__main__":
16 |     args = get_args()
17 | 
18 |     df = pd.read_table(args.annotation_file)
19 | 
20 |     cur_df = df.copy()
21 |     cur_df = cur_df.drop(columns=['hint', 'category', 'source', 'image', 'comment', 'l2-category'])
22 |     cur_df.insert(6, 'prediction', None)
23 |     for pred in open(os.path.join(args.result_dir, f"{args.experiment}.jsonl")):
24 |         pred = json.loads(pred)
25 |         cur_df.loc[df['index'] == pred['question_id'], 'prediction'] = pred['text']
26 | 
27 |     cur_df.to_excel(os.path.join(args.upload_dir, f"{args.experiment}.xlsx"), index=False, engine='openpyxl')
28 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/pope/pope.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/POPE
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "pope"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.pope_doc_to_visual
 8 | doc_to_text: !function utils.pope_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   max_new_tokens: 128
12 |   temperature: 0
13 |   top_p: 0
14 |   num_beams: 1
15 |   do_sample: false
16 | process_results: !function utils.pope_process_results
17 | metric_list:
18 |   - metric: pope_accuracy
19 |     aggregation: !function utils.pope_aggregate_accuracy
20 |     higher_is_better: true
21 |   - metric: pope_precision
22 |     aggregation: !function utils.pope_aggregate_precision
23 |     higher_is_better: true
24 |   - metric: pope_recall
25 |     aggregation: !function utils.pope_aggregate_recall
26 |     higher_is_better: true
27 |   - metric: pope_f1_score
28 |     aggregation: !function utils.pope_aggregate_f1_score
29 |     higher_is_better: true
30 |   - metric: pope_yes_ratio
31 |     aggregation: !function utils.pope_aggregate_yes_ratio
32 |     higher_is_better: true
33 | metadata:
34 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/docvqa/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import logging
 4 | 
 5 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
 6 | 
 7 | logger = logging.getLogger("lmms-eval")
 8 | 
 9 | 
10 | def docvqa_doc_to_visual(doc):
11 |     return [doc["image"].convert("RGB")]
12 | 
13 | 
14 | def docvqa_doc_to_text(doc, model_specific_prompt_kwargs):
15 |     question = doc["question"]
16 |     pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
17 |     post_prompt = model_specific_prompt_kwargs["post_prompt"]
18 |     return f"{pre_prompt}{question}{post_prompt}"
19 | 
20 | 
21 | def docvqa_test_process_results(doc, results):
22 |     pred = results[0]
23 |     questionId = doc["questionId"]
24 |     return {"anls": {"questionId": int(questionId), "answer": pred}, "submission": {"questionId": int(questionId), "answer": pred}}
25 | 
26 | 
27 | def docvqa_test_aggregate_results(results, args):
28 |     # save results as json
29 |     path = generate_submission_file("docvqa_test_for_submission.json", args)
30 |     with open(path, "w") as f:
31 |         json.dump(results, f)
32 |     logger.info(f"Results saved to {path}")
33 | 


--------------------------------------------------------------------------------
/Eagle2_5/deepspeed_configs/zero_stage1_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "zero_optimization": {
 3 |       "stage": 1,
 4 |       "allgather_partitions": true,
 5 |       "allgather_bucket_size": 1e9,
 6 |       "overlap_comm": true,
 7 |       "reduce_scatter": true,
 8 |       "reduce_bucket_size": 1e9,
 9 |       "contiguous_gradients": true
10 |     },
11 |     "fp16": {
12 |       "enabled": "auto",
13 |       "auto_cast": true,
14 |       "loss_scale": 0,
15 |       "initial_scale_power": 32,
16 |       "loss_scale_window": 1000,
17 |       "hysteresis": 2,
18 |       "min_loss_scale": 1
19 |     },
20 |     "bf16": {
21 |       "enabled": "auto"
22 |     },
23 |     "optimizer": {
24 |       "type": "AdamW",
25 |       "params": {
26 |         "lr": "auto",
27 |         "betas": [
28 |           0.9,
29 |           0.999
30 |         ],
31 |         "eps": 1e-8,
32 |         "weight_decay": "auto"
33 |       }
34 |     },
35 |     "gradient_accumulation_steps": "auto",
36 |     "gradient_clipping": "auto",
37 |     "steps_per_print": 2000,
38 |     "train_batch_size": "auto",
39 |     "train_micro_batch_size_per_gpu": "auto",
40 |     "wall_clock_breakdown": true
41 |   }
42 |   


--------------------------------------------------------------------------------
/Eagle2_5/deployment/setup_x86.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/tensorrt:25.06-py3
 2 | 
 3 | RUN chmod 1777 /tmp
 4 | # RUN apt update && apt install -y libturbojpeg libsm6 libxext6 -y
 5 | # RUN apt install libgl1-mesa-glx libsm6 libxext6  -y
 6 | 
 7 | # RUN pip install setuptools==65.5.1 debugpy einops tqdm numpy pandas
 8 | # RUN pip install llvmlite==0.41.0
 9 | # RUN pip install numba==0.58.0 scikit-image==0.18.3 "matplotlib<3.6.0"
10 | 
11 | RUN pip install onnx onnxsim onnxruntime onnx_graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com
12 | RUN pip install pycuda fvcore timm peft liger_kernel
13 | RUN pip install transformers==4.51.0 accelerate
14 | RUN pip install qwen-vl-utils[decord]==0.0.8
15 | RUN FLASH_ATTENTION_FORCE_BUILD=TRUE MAX_JOBS=16 pip install flash-attn
16 | RUN pip install dill==0.3.7
17 | RUN pip install mpi4py
18 | 
19 | # Update cmake version from 3.24 to 3.27
20 | RUN apt update; \
21 |     apt install -y build-essential libssl-dev; \
22 |     wget https://github.com/Kitware/CMake/releases/download/v3.27.6/cmake-3.27.6.tar.gz; \
23 |     tar xf cmake-3.27.6.tar.gz; \
24 |     cd cmake-3.27.6; \
25 |     ./bootstrap; \
26 |     make -j$(nproc); \
27 |     make install
28 | 
29 | RUN apt install ninja-build
30 | 


--------------------------------------------------------------------------------
/Eagle2_5/deepspeed_configs/zero_stage3_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "zero_optimization": {
 3 |     "stage": 3,
 4 |     "overlap_comm": true,
 5 |     "contiguous_gradients": true,
 6 |     "sub_group_size": 1e9,
 7 |     "reduce_bucket_size": 1e9,
 8 |     "stage3_prefetch_bucket_size": 1e9,
 9 |     "stage3_param_persistence_threshold": 1e7,
10 |     "stage3_max_live_parameters": 1e9,
11 |     "stage3_max_reuse_distance": 1e9,
12 |     "stage3_gather_16bit_weights_on_model_save": true
13 |   },
14 |   "fp16": {
15 |     "enabled": "auto",
16 |     "auto_cast": true,
17 |     "loss_scale": 0,
18 |     "initial_scale_power": 32,
19 |     "loss_scale_window": 1000,
20 |     "hysteresis": 2,
21 |     "min_loss_scale": 1
22 |   },
23 |   "bf16": {
24 |     "enabled": "auto"
25 |   },
26 |   "optimizer": {
27 |     "type": "AdamW",
28 |     "params": {
29 |       "lr": "auto",
30 |       "betas": [
31 |         0.9,
32 |         0.999
33 |       ],
34 |       "eps": 1e-8,
35 |       "weight_decay": "auto"
36 |     }
37 |   },
38 |   "gradient_accumulation_steps": "auto",
39 |   "gradient_clipping": "auto",
40 |   "steps_per_print": 2000,
41 |   "train_batch_size": "auto",
42 |   "train_micro_batch_size_per_gpu": "auto",
43 |   "wall_clock_breakdown": true
44 | }
45 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/mme/mme.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MME
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "mme"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.mme_doc_to_visual
 8 | doc_to_text: !function utils.mme_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   max_new_tokens: 16
12 |   temperature: 0
13 |   top_p: 0
14 |   num_beams: 1
15 |   do_sample: false
16 | # The return value of process_results will be used by metrics
17 | process_results: !function utils.mme_process_results
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: mme_percetion_score
21 |     aggregation: !function utils.mme_aggregate_results
22 |     higher_is_better: true
23 |   - metric: mme_cognition_score
24 |     aggregation: !function utils.mme_aggregate_results
25 |     higher_is_better: true
26 | model_specific_prompt_kwargs:
27 |   default:
28 |     pre_prompt: ""
29 |     post_prompt: "\nAnswer the question using a single word or phrase."
30 |   qwen_vl:  
31 |     pre_prompt: ""
32 |     post_prompt: " Answer:"
33 |   otterhd:
34 |     pre_prompt: ""
35 |     post_prompt: " Answer:"
36 | metadata:
37 |   - version: 0.0
38 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/llava-bench-coco/llava-bench-coco.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/llava-bench-coco
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "llava_bench_coco"
 5 | test_split: train
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.llava_doc_to_visual
 8 | doc_to_text: !function utils.llava_doc_to_text
 9 | doc_to_target: "gpt_answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   image_aspect_ratio: original
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 | process_results: !function utils.llava_process_results
19 | metric_list:
20 |   - metric: gpt_eval_llava_all
21 |     aggregation: !function utils.llava_all_aggregation
22 |     higher_is_better: true
23 |   - metric: gpt_eval_llava_conv
24 |     aggregation: !function utils.llava_conv_aggregation
25 |     higher_is_better: true
26 |   - metric: gpt_eval_llava_detail
27 |     aggregation: !function utils.llava_detail_aggregation
28 |     higher_is_better: true
29 |   - metric: gpt_eval_llava_complex
30 |     aggregation: !function utils.llava_complex_aggregation
31 |     higher_is_better: true
32 | metadata:
33 |   version: 0.0
34 |   gpt_eval_model_name: "gpt-4-0314"
35 | model_specific_prompt_kwargs:
36 |   default:
37 |     pre_prompt: ""
38 |     post_prompt: ""


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/ferret/ferret.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/Ferret-Bench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "ferret"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.ferret_doc_to_visual
 8 | doc_to_text: !function utils.ferret_doc_to_text
 9 | doc_to_target: "gpt_answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   image_aspect_ratio: original
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | process_results: !function utils.ferret_process_results
20 | metric_list:
21 |   - metric: gpt_eval_ferret_all
22 |     aggregation: !function utils.ferret_all_aggregation
23 |     higher_is_better: true
24 |   - metric: gpt_eval_ferret_refer_desc
25 |     aggregation: !function utils.ferret_refer_desc_aggregation
26 |     higher_is_better: true
27 |   - metric: gpt_eval_ferret_refer_reason
28 |     aggregation: !function utils.ferret_refer_reason_aggregation
29 |     higher_is_better: true
30 |   - metric: gpt_eval_ferret_ground_conv
31 |     aggregation: !function utils.ferret_ground_conv_aggregation
32 |     higher_is_better: true
33 | metadata:
34 |   version: 0.0
35 |   gpt_eval_model_name: "gpt-4-0314"
36 | model_specific_prompt_kwargs:
37 |   default:
38 |     pre_prompt: ""
39 |     post_prompt: ""


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/llava-in-the-wild/llava-in-the-wild.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/llava-bench-in-the-wild
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "llava_in_the_wild"
 5 | test_split: train
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.llava_doc_to_visual
 8 | doc_to_text: !function utils.llava_doc_to_text
 9 | doc_to_target: "gpt_answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   image_aspect_ratio: original
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | process_results: !function utils.llava_process_results
20 | metric_list:
21 |   - metric: gpt_eval_llava_all
22 |     aggregation: !function utils.llava_all_aggregation
23 |     higher_is_better: true
24 |   - metric: gpt_eval_llava_conv
25 |     aggregation: !function utils.llava_conv_aggregation
26 |     higher_is_better: true
27 |   - metric: gpt_eval_llava_detail
28 |     aggregation: !function utils.llava_detail_aggregation
29 |     higher_is_better: true
30 |   - metric: gpt_eval_llava_complex
31 |     aggregation: !function utils.llava_complex_aggregation
32 |     higher_is_better: true
33 | metadata:
34 |   version: 0.0
35 |   gpt_eval_model_name: "gpt-4-0613"
36 | model_specific_prompt_kwargs:
37 |   default:
38 |     pre_prompt: ""
39 |     post_prompt: ""
40 | 


--------------------------------------------------------------------------------
/Eagle2_5/document/0.onboarding.md:
--------------------------------------------------------------------------------
 1 | # 🦅 Onboarding Eagle 2.5
 2 | 
 3 | This guide provides step-by-step instructions from the environment setup to the data preparation, training, and inference of the Eagle VLM.
 4 | 
 5 | ---
 6 | 
 7 | ### 📋 Main Steps
 8 | 
 9 | ```
10 | # Clone the repository
11 | git clone -b main --single-branch https://github.com/NVlabs/Eagle.git
12 | 
13 | # Navigate to the working directory (Eagle 2.5)
14 | cd Eagle/Eagle2_5
15 | 
16 | ```
17 | > ⚠️ Note: All commands below should be executed within the `Eagle/Eagle2_5` directory.
18 | 
19 | 
20 | 1. 🦅 [Installing Eagle](./1.installing.md)  
21 |    Install Eagle and all necessary dependencies.
22 | 
23 | 2. 📊 [Preparing Data](./2.preparing_playground.md)  
24 |    Prepare your dataset in the required format.
25 | 
26 | 3. 💪 [Starting Training](./3.training.md)  
27 |    Train the Eagle model using your prepared data.
28 | 
29 | 4. ✨ [Launching Streamlit Demo](./4.streamlit_demo.md)  
30 |    Run an interactive Streamlit demo to visualize results.
31 | 
32 | 5. 🔮 [Model Inference](./5.inference.md)  
33 |    Perform inference using the trained model.
34 | 
35 | ---
36 | 
37 | ### 📎 Others
38 | 
39 | - 📖 [Explain Script Arguments](./explain_script_arguments.md)
40 | - 📖 [How to Use Lmdb](./how_to_use_lmdb_to_read_images.md)
41 | - 📖 [TensorRT-LLM Deployment](../deployment/README.md)


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/_default_template_seg_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/RefCOCO
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.refcoco_seg_doc_to_visual
 4 | doc_to_text: !function utils.refcoco_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | process_results: !function utils.refcoco_process_result
10 | metric_list:
11 |   - metric: refcoco_Bleu_4 
12 |     aggregation : !function utils.refcoco_bleu4
13 |     higher_is_better : true
14 |   - metric: refcoco_Bleu_3
15 |     aggregation : !function utils.refcoco_bleu3
16 |     higher_is_better : true
17 |   - metric: refcoco_Bleu_2
18 |     aggregation : !function utils.refcoco_bleu2
19 |     higher_is_better : true
20 |   - metric: refcoco_Bleu_1
21 |     aggregation : !function utils.refcoco_bleu1
22 |     higher_is_better : true
23 |   - metric: refcoco_METEOR
24 |     aggregation : !function utils.refcoco_meteor
25 |     higher_is_better : true
26 |   - metric: refcoco_ROUGE_L
27 |     aggregation : !function utils.refcoco_rougel
28 |     higher_is_better : true
29 |   - metric: refcoco_CIDEr
30 |     aggregation : !function utils.refcoco_cider
31 |     higher_is_better : true
32 |   #- metric: refcoco_SPICE
33 |   #  aggregation : !function utils.refcoco_spice
34 |   #  higher_is_better : true
35 | metadata:
36 |   version: '0.0'


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco+/_default_template_seg_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/RefCOCOplus
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.refcoco_seg_doc_to_visual
 4 | doc_to_text: !function utils.refcoco_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | process_results: !function utils.refcoco_process_result
10 | metric_list:
11 |   - metric: refcoco_Bleu_4 
12 |     aggregation : !function utils.refcoco_bleu4
13 |     higher_is_better : true
14 |   - metric: refcoco_Bleu_3
15 |     aggregation : !function utils.refcoco_bleu3
16 |     higher_is_better : true
17 |   - metric: refcoco_Bleu_2
18 |     aggregation : !function utils.refcoco_bleu2
19 |     higher_is_better : true
20 |   - metric: refcoco_Bleu_1
21 |     aggregation : !function utils.refcoco_bleu1
22 |     higher_is_better : true
23 |   - metric: refcoco_METEOR
24 |     aggregation : !function utils.refcoco_meteor
25 |     higher_is_better : true
26 |   - metric: refcoco_ROUGE_L
27 |     aggregation : !function utils.refcoco_rougel
28 |     higher_is_better : true
29 |   - metric: refcoco_CIDEr
30 |     aggregation : !function utils.refcoco_cider
31 |     higher_is_better : true
32 |   #- metric: refcoco_SPICE
33 |   #  aggregation : !function utils.refcoco_spice
34 |   #  higher_is_better : true
35 | metadata:
36 |   version: '0.0'


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco/_default_template_bbox_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/RefCOCO
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.refcoco_bbox_doc_to_visual
 4 | doc_to_text: !function utils.refcoco_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | process_results: !function utils.refcoco_process_result
10 | metric_list:
11 |   - metric: refcoco_Bleu_4 
12 |     aggregation : !function utils.refcoco_bleu4
13 |     higher_is_better : true
14 |   - metric: refcoco_Bleu_3
15 |     aggregation : !function utils.refcoco_bleu3
16 |     higher_is_better : true
17 |   - metric: refcoco_Bleu_2
18 |     aggregation : !function utils.refcoco_bleu2
19 |     higher_is_better : true
20 |   - metric: refcoco_Bleu_1
21 |     aggregation : !function utils.refcoco_bleu1
22 |     higher_is_better : true
23 |   - metric: refcoco_METEOR
24 |     aggregation : !function utils.refcoco_meteor
25 |     higher_is_better : true
26 |   - metric: refcoco_ROUGE_L
27 |     aggregation : !function utils.refcoco_rougel
28 |     higher_is_better : true
29 |   - metric: refcoco_CIDEr
30 |     aggregation : !function utils.refcoco_cider
31 |     higher_is_better : true
32 |   #- metric: refcoco_SPICE
33 |   #  aggregation : !function utils.refcoco_spice
34 |   #  higher_is_better : true
35 | metadata:
36 |   version: '0.0'


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcocog/_default_template_bbox_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/RefCOCOg
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.refcoco_bbox_doc_to_visual
 4 | doc_to_text: !function utils.refcoco_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | process_results: !function utils.refcoco_process_result
10 | metric_list:
11 |   - metric: refcoco_Bleu_4 
12 |     aggregation : !function utils.refcoco_bleu4
13 |     higher_is_better : true
14 |   - metric: refcoco_Bleu_3
15 |     aggregation : !function utils.refcoco_bleu3
16 |     higher_is_better : true
17 |   - metric: refcoco_Bleu_2
18 |     aggregation : !function utils.refcoco_bleu2
19 |     higher_is_better : true
20 |   - metric: refcoco_Bleu_1
21 |     aggregation : !function utils.refcoco_bleu1
22 |     higher_is_better : true
23 |   - metric: refcoco_METEOR
24 |     aggregation : !function utils.refcoco_meteor
25 |     higher_is_better : true
26 |   - metric: refcoco_ROUGE_L
27 |     aggregation : !function utils.refcoco_rougel
28 |     higher_is_better : true
29 |   - metric: refcoco_CIDEr
30 |     aggregation : !function utils.refcoco_cider
31 |     higher_is_better : true
32 |   #- metric: refcoco_SPICE
33 |   #  aggregation : !function utils.refcoco_spice
34 |   #  higher_is_better : true
35 | metadata:
36 |   version: '0.0'


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcocog/_default_template_seg_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/RefCOCOg
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.refcoco_seg_doc_to_visual
 4 | doc_to_text: !function utils.refcoco_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | process_results: !function utils.refcoco_process_result
10 | metric_list:
11 |   - metric: refcoco_Bleu_4 
12 |     aggregation : !function utils.refcoco_bleu4
13 |     higher_is_better : true
14 |   - metric: refcoco_Bleu_3
15 |     aggregation : !function utils.refcoco_bleu3
16 |     higher_is_better : true
17 |   - metric: refcoco_Bleu_2
18 |     aggregation : !function utils.refcoco_bleu2
19 |     higher_is_better : true
20 |   - metric: refcoco_Bleu_1
21 |     aggregation : !function utils.refcoco_bleu1
22 |     higher_is_better : true
23 |   - metric: refcoco_METEOR
24 |     aggregation : !function utils.refcoco_meteor
25 |     higher_is_better : true
26 |   - metric: refcoco_ROUGE_L
27 |     aggregation : !function utils.refcoco_rougel
28 |     higher_is_better : true
29 |   - metric: refcoco_CIDEr
30 |     aggregation : !function utils.refcoco_cider
31 |     higher_is_better : true
32 |   #- metric: refcoco_SPICE
33 |   #  aggregation : !function utils.refcoco_spice
34 |   #  higher_is_better : true
35 | metadata:
36 |   version: '0.0'


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/refcoco+/_default_template_bbox_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/RefCOCOplus
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.refcoco_bbox_doc_to_visual
 4 | doc_to_text: !function utils.refcoco_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | process_results: !function utils.refcoco_process_result
10 | metric_list:
11 |   - metric: refcoco_Bleu_4 
12 |     aggregation : !function utils.refcoco_bleu4
13 |     higher_is_better : true
14 |   - metric: refcoco_Bleu_3
15 |     aggregation : !function utils.refcoco_bleu3
16 |     higher_is_better : true
17 |   - metric: refcoco_Bleu_2
18 |     aggregation : !function utils.refcoco_bleu2
19 |     higher_is_better : true
20 |   - metric: refcoco_Bleu_1
21 |     aggregation : !function utils.refcoco_bleu1
22 |     higher_is_better : true
23 |   - metric: refcoco_METEOR
24 |     aggregation : !function utils.refcoco_meteor
25 |     higher_is_better : true
26 |   - metric: refcoco_ROUGE_L
27 |     aggregation : !function utils.refcoco_rougel
28 |     higher_is_better : true
29 |   - metric: refcoco_CIDEr
30 |     aggregation : !function utils.refcoco_cider
31 |     higher_is_better : true
32 |   #- metric: refcoco_SPICE
33 |   #  aggregation : !function utils.refcoco_spice
34 |   #  higher_is_better : true
35 | metadata:
36 |   version: '0.0'


--------------------------------------------------------------------------------
/Eagle/scripts/eval/vqav2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | CKPT=$1
 3 | NAME=$2
 4 | 
 5 | gpu_list="${CUDA_VISIBLE_DEVICES:-0}"
 6 | IFS=',' read -ra GPULIST <<< "$gpu_list"
 7 | 
 8 | CHUNKS=${#GPULIST[@]}
 9 | 
10 | SPLIT="llava_vqav2_mscoco_test-dev2015"
11 | LOCAL_ANSWER_DIR="./playground/data/eval_local_files/vqav2"
12 | 
13 | for IDX in $(seq 0 $((CHUNKS-1))); do
14 |     CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m eagle.eval.model_vqa_loader \
15 |         --model-path $CKPT \
16 |         --question-file ./playground/data/eval/vqav2/$SPLIT.jsonl \
17 |         --image-folder ./playground/data/eval/vqav2/test2015 \
18 |         --answers-file ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/${CHUNKS}_${IDX}.jsonl \
19 |         --num-chunks $CHUNKS \
20 |         --chunk-idx $IDX \
21 |         --temperature 0 \
22 |         --conv-mode vicuna_v1 &
23 | done
24 | 
25 | wait
26 | 
27 | output_file=${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/merge.jsonl
28 | 
29 | # Clear out the output file if it exists.
30 | > "$output_file"
31 | 
32 | # Loop through the indices and concatenate each file.
33 | for IDX in $(seq 0 $((CHUNKS-1))); do
34 |     cat ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/${CHUNKS}_${IDX}.jsonl >> "$output_file"
35 | done
36 | 
37 | python scripts/convert_vqav2_for_submission.py --src ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/merge.jsonl --save_path ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/vqav2-upload-$NAME.json --split $SPLIT --ckpt $NAME


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/ai2d/utils.py:
--------------------------------------------------------------------------------
 1 | def ai2d_doc_to_text(doc, model_specific_prompt_kwargs=None):
 2 |     question, choices = doc["question"], doc["options"]
 3 |     len_choices = len(choices)
 4 |     post_prompt = model_specific_prompt_kwargs["post_prompt"]
 5 |     pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
 6 |     if model_specific_prompt_kwargs["prompt_format"] == "mcq":
 7 |         options = [chr(ord("A") + i) for i in range(len_choices)]
 8 |         choices_str = "\n".join([f"{option}. {choice}" for option, choice in zip(options, choices)])
 9 |         return f"{pre_prompt}{question}\n{choices_str}{post_prompt}"
10 |     elif model_specific_prompt_kwargs["prompt_format"] == "qa":
11 |         options = "\n".join(choices)
12 |         return f"{pre_prompt}{question}{options}{post_prompt}"
13 |     else:
14 |         raise ValueError(f"Unknown prompt format: {model_specific_prompt_kwargs['prompt_format']}")
15 | 
16 | 
17 | def ai2d_doc_to_visual(doc):
18 |     return [doc["image"].convert("RGB")]
19 | 
20 | 
21 | def ai2d_doc_to_target(doc, model_specific_target_kwargs):
22 |     if model_specific_target_kwargs == "mcq":
23 |         len_choices = len(doc["options"])
24 |         options = [chr(ord("A") + i) for i in range(len_choices)]
25 |         return options[int(doc["answer"])]
26 |     elif model_specific_target_kwargs == "qa":
27 |         return doc["options"][int(doc["answer"])]
28 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/hallusion_bench/hallusion_bench_image.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/HallusionBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "hallusion_bench_image"
 5 | test_split: image
 6 | output_type: generate_until
 7 | doc_to_visual: !function evaluate_hb.hb_doc_to_visual
 8 | doc_to_text: !function evaluate_hb.hb_doc_to_text
 9 | doc_to_target: "gt_answer_details"
10 | process_results: !function evaluate_hb.hb_process_results
11 | model_specific_prompt_kwargs:
12 |   default:
13 |     pre_prompt: ""
14 |     post_prompt: ""
15 | generation_kwargs:
16 |   max_new_tokens: 128
17 |   temperature: 0
18 |   top_p: 0
19 |   num_beams: 1
20 |   do_sample: false
21 | metric_list:
22 |   - metric: aAcc
23 |     aggregation: !function evaluate_hb.hb_aggregation_result_aAcc
24 |     higher_is_better: true
25 |   - metric: qAcc
26 |     aggregation: !function evaluate_hb.hb_aggregation_result_qAcc
27 |     higher_is_better: true
28 |   - metric: fAcc
29 |     aggregation: !function evaluate_hb.hb_aggregation_result_fAcc
30 |     higher_is_better: true
31 |   # - metric: aAcc
32 |   #  aggregation: !function evaluate_hb.hb_aggregation_result_aAcc_intern
33 |   #  higher_is_better: true
34 |   # - metric: qAcc
35 |   #  aggregation: !function evaluate_hb.hb_aggregation_result_qAcc_intern
36 |   #  higher_is_better: true
37 |   # - metric: fAcc
38 |   #  aggregation: !function evaluate_hb.hb_aggregation_result_fAcc_intern
39 |   #  higher_is_better: true
40 | metadata:
41 |   - version: 0.0
42 | 


--------------------------------------------------------------------------------
/Eagle2_5/document/how_to_use_lmdb_to_read_images.md:
--------------------------------------------------------------------------------
 1 | ## original 
 2 | 
 3 | ```json
 4 | {
 5 |     "conversations": [
 6 |         {"from": "human", "value": "<image> what is this?"},
 7 |         {"from": "gpt", "value": "It is an apple."}
 8 |     ],
 9 |     "image": "path/to/image.jpg"
10 | }
11 | ```
12 | 
13 | ```python
14 | from PIL import Image
15 | pil_image = Image.open("path/to/image.jpg")
16 | ```
17 | 
18 | 
19 | ### lmdb 
20 | 
21 | ```json
22 | # sample
23 | {
24 |     "conversations": [
25 |         {"from": "human", "value": "<image> what is this?"},
26 |         {"from": "gpt", "value": "It is an apple."}
27 |     ],
28 |     "image": {
29 |         "lmdb_file": "path/to/lmdb/file",
30 |         "lmdb_key": "image_key"
31 |     }
32 | }
33 | ```
34 | 
35 | ```python
36 | import lmdb
37 | import cv2
38 | from PIL import Image
39 | import io
40 | import numpy as np
41 | 
42 | image_meta = sample["image"]
43 | lmdb_file = image_meta["lmdb_file"]
44 | lmdb_key = image_meta["lmdb_key"]
45 | env = lmdb.open(lmdb_file, readonly=True, lock=False)
46 | with env.begin(write=False) as txn:
47 |     image_bin = txn.get(lmdb_key.encode('ascii'))
48 | buf = io.BytesIO(image_bin)
49 | try:
50 |     pil_image = Image.open(buf)
51 | except Exception as e:
52 |     image_np = np.frombuffer(image_bin, dtype=np.uint8)
53 |     image_bgr = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
54 |     image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
55 |     pil_image = Image.fromarray(image_rgb)
56 | ```
57 | 


--------------------------------------------------------------------------------
/Eagle/scripts/zero3_offload.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "fp16": {
 3 |     "enabled": "auto",
 4 |     "loss_scale": 0,
 5 |     "loss_scale_window": 1000,
 6 |     "initial_scale_power": 16,
 7 |     "hysteresis": 2,
 8 |     "min_loss_scale": 1
 9 |   },
10 |   "bf16": {
11 |     "enabled": "auto"
12 |   },
13 |   "optimizer": {
14 |     "type": "AdamW",
15 |     "params": {
16 |       "lr": "auto",
17 |       "betas": "auto",
18 |       "eps": "auto",
19 |       "weight_decay": "auto"
20 |     }
21 |   },
22 |   "scheduler": {
23 |     "type": "WarmupLR",
24 |     "params": {
25 |       "warmup_min_lr": "auto",
26 |       "warmup_max_lr": "auto",
27 |       "warmup_num_steps": "auto"
28 |     }
29 |   },
30 |   "zero_optimization": {
31 |     "stage": 3,
32 |     "offload_optimizer": {
33 |       "device": "cpu",
34 |       "pin_memory": true
35 |     },
36 |     "offload_param": {
37 |       "device": "cpu",
38 |       "pin_memory": true
39 |     },
40 |     "overlap_comm": true,
41 |     "contiguous_gradients": true,
42 |     "sub_group_size": 1e9,
43 |     "reduce_bucket_size": "auto",
44 |     "stage3_prefetch_bucket_size": "auto",
45 |     "stage3_param_persistence_threshold": "auto",
46 |     "stage3_max_live_parameters": 1e9,
47 |     "stage3_max_reuse_distance": 1e9,
48 |     "gather_16bit_weights_on_model_save": true
49 |   },
50 |   "gradient_accumulation_steps": "auto",
51 |   "gradient_clipping": "auto",
52 |   "train_batch_size": "auto",
53 |   "train_micro_batch_size_per_gpu": "auto",
54 |   "steps_per_print": 1e5,
55 |   "wall_clock_breakdown": false
56 | }


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/coco_cap/coco2017_cap_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/COCO-Caption2017
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "coco2017_cap_val"
 5 | group : "coco_caption2017"
 6 | test_split: val
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.coco_doc_to_visual
 9 | doc_to_text: !function utils.coco_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 64
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.coco_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: coco_Bleu_4 
21 |     aggregation : !function utils.coco_bleu4
22 |     higher_is_better : true
23 |   - metric: coco_Bleu_3
24 |     aggregation : !function utils.coco_bleu3
25 |     higher_is_better : true
26 |   - metric: coco_Bleu_2
27 |     aggregation : !function utils.coco_bleu2
28 |     higher_is_better : true
29 |   - metric: coco_Bleu_1
30 |     aggregation : !function utils.coco_bleu1
31 |     higher_is_better : true
32 |   - metric: coco_METEOR
33 |     aggregation : !function utils.coco_meteor
34 |     higher_is_better : true
35 |   - metric: coco_ROUGE_L
36 |     aggregation : !function utils.coco_rougel
37 |     higher_is_better : true
38 |   - metric: coco_CIDEr
39 |     aggregation : !function utils.coco_cider
40 |     higher_is_better : true
41 |   #- metric: coco_SPICE
42 |   #  aggregation : !function utils.coco_spice
43 |   #  higher_is_better : true
44 | metadata:
45 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/flickr30k/flickr30k_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/flickr30k
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "flickr30k_test"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.flickr_doc_to_visual
 8 | doc_to_text: !function utils.flickr_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   max_new_tokens: 64
12 |   temperature: 0
13 |   top_p: 0
14 |   num_beams: 1
15 |   do_sample: false
16 | process_results: !function utils.flickr_process_result
17 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
18 | metric_list:
19 |   - metric: flickr_Bleu_4 
20 |     aggregation : !function utils.flickr_bleu4
21 |     higher_is_better : true
22 |   - metric: flickr_Bleu_3
23 |     aggregation : !function utils.flickr_bleu3
24 |     higher_is_better : true
25 |   - metric: flickr_Bleu_2
26 |     aggregation : !function utils.flickr_bleu2
27 |     higher_is_better : true
28 |   - metric: flickr_Bleu_1
29 |     aggregation : !function utils.flickr_bleu1
30 |     higher_is_better : true
31 |   - metric: flickr_METEOR
32 |     aggregation : !function utils.flickr_meteor
33 |     higher_is_better : true
34 |   - metric: flickr_ROUGE_L
35 |     aggregation : !function utils.flickr_rougel
36 |     higher_is_better : true
37 |   - metric: flickr_CIDEr
38 |     aggregation : !function utils.flickr_cider
39 |     higher_is_better : true
40 |   #- metric: flickr_SPICE
41 |   #  aggregation : !function utils.flickr_spice
42 |   #  higher_is_better : true
43 | metadata:
44 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle2_5/eaglevl/patch/fused_monkey_patch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA CORPORATION & AFFILIATES
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # SPDX-License-Identifier: Apache-2.0
16 | 
17 | from .fused_ops.fused_rms_norm import LigerRMSNorm
18 | from .fused_ops.fused_rotary_pos_emb import liger_rotary_pos_emb
19 | from .fused_ops.fused_swiglu import LigerSwiGLUMLP
20 | 
21 | 
22 | def replace_liger_fused_ops():
23 |     from transformers.models.qwen2 import modeling_qwen2
24 |     modeling_qwen2.Qwen2MLP = LigerSwiGLUMLP
25 |     modeling_qwen2.Qwen2RMSNorm = LigerRMSNorm
26 |     modeling_qwen2.apply_rotary_pos_emb = liger_rotary_pos_emb
27 |     
28 |     from transformers.models.llama import modeling_llama
29 |     modeling_llama.LlamaMLP = LigerSwiGLUMLP
30 |     modeling_llama.LlamaRMSNorm = LigerRMSNorm
31 | 
32 |     from transformers.models.qwen3 import modeling_qwen3
33 |     modeling_qwen3.Qwen3MLP = LigerSwiGLUMLP
34 |     modeling_qwen3.Qwen3RMSNorm = LigerRMSNorm
35 |     modeling_qwen3.apply_rotary_pos_emb = liger_rotary_pos_emb
36 |     
37 |     
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/coco_cap/coco2014_cap_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/COCO-Caption
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "coco2014_cap_val"
 5 | group : "coco_caption"
 6 | test_split: val
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.coco_doc_to_visual
 9 | doc_to_text: "Provide a one-sentence caption for the provided image."
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 64
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.coco_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: coco_Bleu_4 
21 |     aggregation : !function utils.coco_bleu4
22 |     higher_is_better : true
23 |   - metric: coco_Bleu_3
24 |     aggregation : !function utils.coco_bleu3
25 |     higher_is_better : true
26 |   - metric: coco_Bleu_2
27 |     aggregation : !function utils.coco_bleu2
28 |     higher_is_better : true
29 |   - metric: coco_Bleu_1
30 |     aggregation : !function utils.coco_bleu1
31 |     higher_is_better : true
32 |   - metric: coco_METEOR
33 |     aggregation : !function utils.coco_meteor
34 |     higher_is_better : true
35 |   - metric: coco_ROUGE_L
36 |     aggregation : !function utils.coco_rougel
37 |     higher_is_better : true
38 |   - metric: coco_CIDEr
39 |     aggregation : !function utils.coco_cider
40 |     higher_is_better : true
41 |   #- metric: coco_SPICE
42 |   #  aggregation : !function utils.coco_spice
43 |   #  higher_is_better : true
44 | metadata:
45 |   - version: 0.0


--------------------------------------------------------------------------------
/Eagle/scripts/eval/gqa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | CKPT=$1
 3 | NAME=$2
 4 | 
 5 | gpu_list="${CUDA_VISIBLE_DEVICES:-0}"
 6 | IFS=',' read -ra GPULIST <<< "$gpu_list"
 7 | 
 8 | CHUNKS=${#GPULIST[@]}
 9 | 
10 | SPLIT="llava_gqa_testdev_balanced"
11 | GQADIR="./playground/data/eval/gqa/data"
12 | LOCAL_ANSWER_DIR="./playground/data/eval_local_files/gqa"
13 | 
14 | for IDX in $(seq 0 $((CHUNKS-1))); do
15 |     CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m eagle.eval.model_vqa_loader \
16 |         --model-path $CKPT \
17 |         --question-file ./playground/data/eval/gqa/$SPLIT.jsonl \
18 |         --image-folder ./playground/data/eval/gqa/data/images \
19 |         --answers-file ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/${CHUNKS}_${IDX}.jsonl \
20 |         --num-chunks $CHUNKS \
21 |         --chunk-idx $IDX \
22 |         --temperature 0 \
23 |         --conv-mode vicuna_v1 &
24 | done
25 | 
26 | wait
27 | 
28 | output_file=${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/merge.jsonl
29 | 
30 | # Clear out the output file if it exists.
31 | > "$output_file"
32 | 
33 | # Loop through the indices and concatenate each file.
34 | for IDX in $(seq 0 $((CHUNKS-1))); do
35 |     cat ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/${CHUNKS}_${IDX}.jsonl >> "$output_file"
36 | done
37 | 
38 | python scripts/convert_gqa_for_eval.py --src $output_file --dst ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/testdev_balanced_predictions.json
39 | absolute_path=$(readlink -f "${LOCAL_ANSWER_DIR}/$SPLIT/$NAME")
40 | 
41 | cd $GQADIR
42 | # python eval/eval.py --predictions ${LOCAL_ANSWER_DIR}/$SPLIT/$name/{tier}_predictions.json --tier testdev_balanced
43 | python eval.py --predictions ${absolute_path}/{tier}_predictions.json --tier testdev_balanced


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/nocaps/nocaps_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/NoCaps
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "nocaps_val"
 5 | group : "nocaps_caption"
 6 | test_split: validation
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.nocaps_doc_to_visual
 9 | doc_to_text: !function utils.nocaps_doc_to_text
10 | doc_to_target: "annotations_captions"
11 | generation_kwargs:
12 |   max_new_tokens: 64
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.nocaps_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: nocaps_Bleu_4 
21 |     aggregation : !function utils.nocaps_bleu4
22 |     higher_is_better : true
23 |   - metric: nocaps_Bleu_3
24 |     aggregation : !function utils.nocaps_bleu3
25 |     higher_is_better : true
26 |   - metric: nocaps_Bleu_2
27 |     aggregation : !function utils.nocaps_bleu2
28 |     higher_is_better : true
29 |   - metric: nocaps_Bleu_1
30 |     aggregation : !function utils.nocaps_bleu1
31 |     higher_is_better : true
32 |   - metric: nocaps_METEOR
33 |     aggregation : !function utils.nocaps_meteor
34 |     higher_is_better : true
35 |   - metric: nocaps_ROUGE_L
36 |     aggregation : !function utils.nocaps_rougel
37 |     higher_is_better : true
38 |   - metric: nocaps_CIDEr
39 |     aggregation : !function utils.nocaps_cider
40 |     higher_is_better : true
41 |   #- metric: nocaps_SPICE
42 |   #  aggregation : !function utils.nocaps_spice
43 |   #  higher_is_better : true
44 | metadata:
45 |   - version: 0.0
46 | include: _default_template_nocaps_yaml


--------------------------------------------------------------------------------
/Eagle/lmms_eval/filters/__init__.py:
--------------------------------------------------------------------------------
 1 | from lmms_eval.api.filter import FilterEnsemble
 2 | from . import selection
 3 | from . import extraction
 4 | from . import transformation
 5 | 
 6 | 
 7 | FILTER_REGISTRY = {
 8 |     "take_first": selection.TakeFirstFilter,
 9 |     "regex": extraction.RegexFilter,
10 |     "majority_vote": selection.MajorityVoteFilter,
11 |     "take_first_k": selection.TakeKFilter,
12 |     "remove_whitespace": extraction.WhitespaceFilter,
13 |     "lowercase": transformation.LowercaseFilter,
14 |     "uppercase": transformation.UppercaseFilter,
15 |     "map": transformation.MapFilter,
16 |     # TODO: implement this filter. either it should take in an arbitrary "scoring"/reward function
17 |     # that takes an input and returns a scalar and then should select the max reward,
18 |     # or should implement different filters for different ways of handling a reward model's inference.
19 |     # "arg_max": selection.ArgMaxFilter,
20 | }
21 | 
22 | 
23 | def get_filter(filter_name):
24 |     if filter_name in FILTER_REGISTRY:
25 |         return FILTER_REGISTRY[filter_name]
26 |     else:
27 |         return filter_name
28 | 
29 | 
30 | def build_filter_ensemble(filter_name, components):
31 |     """
32 |     Create a filtering pipeline.
33 |     """
34 |     filters = []
35 |     for function, kwargs in components:
36 |         if kwargs is None:
37 |             f = get_filter(function)()
38 |         else:
39 |             # create a filter given its name in the registry
40 |             f = get_filter(function)(**kwargs)  # TODO: pass kwargs to filters properly
41 |         # add the filter as a pipeline step
42 |         filters.append(f)
43 | 
44 |     return FilterEnsemble(name=filter_name, filters=filters)
45 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/textcaps/textcaps_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/TextCaps
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "textcaps_val"
 5 | group : "textcaps_caption"
 6 | test_split: val
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.textcaps_doc_to_visual
 9 | doc_to_text: !function utils.textcaps_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 64
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.textcaps_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: textcaps_Bleu_4 
21 |     aggregation : !function utils.textcaps_bleu4
22 |     higher_is_better : true
23 |   - metric: textcaps_Bleu_3
24 |     aggregation : !function utils.textcaps_bleu3
25 |     higher_is_better : true
26 |   - metric: textcaps_Bleu_2
27 |     aggregation : !function utils.textcaps_bleu2
28 |     higher_is_better : true
29 |   - metric: textcaps_Bleu_1
30 |     aggregation : !function utils.textcaps_bleu1
31 |     higher_is_better : true
32 |   - metric: textcaps_METEOR
33 |     aggregation : !function utils.textcaps_meteor
34 |     higher_is_better : true
35 |   - metric: textcaps_ROUGE_L
36 |     aggregation : !function utils.textcaps_rougel
37 |     higher_is_better : true
38 |   - metric: textcaps_CIDEr
39 |     aggregation : !function utils.textcaps_cider
40 |     higher_is_better : true
41 |   #- metric: textcaps_SPICE
42 |   #  aggregation : !function utils.textcaps_spice
43 |   #  higher_is_better : true
44 | metadata:
45 |   - version: 0.0
46 | include: _default_template_textcaps_yaml


--------------------------------------------------------------------------------
/Eagle/lmms_eval/filters/transformation.py:
--------------------------------------------------------------------------------
 1 | from lmms_eval.api.filter import Filter
 2 | 
 3 | 
 4 | class LowercaseFilter(Filter):
 5 |     def __init__(self) -> None:
 6 |         pass
 7 | 
 8 |     def apply(self, resps, docs):
 9 |         def filter_set(inst):
10 |             return [resp.lower() for resp in inst]
11 | 
12 |         return [filter_set(resp) for resp in resps]
13 | 
14 | 
15 | class UppercaseFilter(Filter):
16 |     def __init__(self) -> None:
17 |         pass
18 | 
19 |     def apply(self, resps, docs):
20 |         def filter_set(inst):
21 |             return [resp.upper() for resp in inst]
22 | 
23 |         return [filter_set(resp) for resp in resps]
24 | 
25 | 
26 | class MapFilter(Filter):
27 |     def __init__(self, mapping_dict: dict = {}, default_value=None) -> None:
28 |         """
29 |         Initializes the MapFilter with a given mapping dictionary and default value.
30 | 
31 |         Args:
32 |         - mapping_dict (dict): A dictionary containing the key-value mappings.
33 |                                Default is an empty dictionary.
34 |         - default_value (Any): The value to be returned when a key is not found in the mapping_dict.
35 |                                Default is None.
36 | 
37 |         Example:
38 |         mapper = MapFilter({'A': 1, 'B': 2}, default_value=0)
39 |         """
40 |         assert isinstance(mapping_dict, dict), "Provided mapping_dict is not a dictionary"
41 |         self.mapping_dict = mapping_dict
42 |         self.default_value = default_value
43 | 
44 |     def apply(self, resps, docs):
45 |         def filter_set(inst):
46 |             return [self.mapping_dict.get(resp, self.default_value) for resp in inst]
47 | 
48 |         return [filter_set(resp) for resp in resps]
49 | 


--------------------------------------------------------------------------------
/Eagle/scripts/convert_vizwiz_for_submission.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import json
 4 | 
 5 | # for debug
 6 | import sys
 7 | sys.path.append(os.getcwd())
 8 | 
 9 | from eagle.eval.m4c_evaluator import EvalAIAnswerProcessor
10 | 
11 | 
12 | def parse_args():
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument('--annotation-file', type=str, required=True)
15 |     parser.add_argument('--result-file', type=str, required=True)
16 |     parser.add_argument('--result-upload-file', type=str, required=True)
17 |     return parser.parse_args()
18 | 
19 | 
20 | if __name__ == '__main__':
21 | 
22 |     args = parse_args()
23 | 
24 |     os.makedirs(os.path.dirname(args.result_upload_file), exist_ok=True)
25 | 
26 |     results = []
27 |     error_line = 0
28 |     for line_idx, line in enumerate(open(args.result_file)):
29 |         try:
30 |             results.append(json.loads(line))
31 |         except:
32 |             error_line += 1
33 |     results = {x['question_id']: x['text'] for x in results}
34 |     test_split = [json.loads(line) for line in open(args.annotation_file)]
35 |     split_ids = set([x['question_id'] for x in test_split])
36 | 
37 |     print(f'total results: {len(results)}, total split: {len(test_split)}, error_line: {error_line}')
38 | 
39 |     all_answers = []
40 | 
41 |     answer_processor = EvalAIAnswerProcessor()
42 | 
43 |     for x in test_split:
44 |         assert x['question_id'] in results
45 |         all_answers.append({
46 |             'image': x['image'],
47 |             'answer': answer_processor(results[x['question_id']])
48 |         })
49 | 
50 |     with open(args.result_upload_file, 'w') as f:
51 |         json.dump(all_answers, f)
52 | 
53 |     print(f"successfully saving results to {args.result_upload_file}")


--------------------------------------------------------------------------------
/Eagle2_5/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "eagle_vl"
 7 | version = "2.5"
 8 | description = "Eagle 2.5: Boosting Long-Context Post-Training for Frontier Vision-Language Models"
 9 | readme = "README.md"
10 | requires-python = ">=3.8"
11 | classifiers = [
12 |     "Programming Language :: Python :: 3",
13 |     "License :: OSI Approved :: Apache Software License",
14 | ]
15 | 
16 | dependencies = ["transformers==4.51.0", "tokenizers==0.21.1", "sentencepiece==0.2.0", "shortuuid",
17 |     "accelerate==1.5.2", "peft==0.12.0", "bitsandbytes", "wandb==0.17.7",
18 |     "pydantic==2.10.6", "markdown2[all]", "numpy==1.26.4", "scikit-learn>=1.2.2",
19 |     "gradio", "gradio_client", "lmdb",
20 |     "requests", "httpx==0.28.1", "uvicorn", "fastapi", "streamlit", "streamlit-image-select", 
21 |     "einops", "einops-exts", "timm==1.0.11", "deepspeed==0.16.5", "av", "liger_kernel==0.3.1", "filetype", "bitstring",
22 |     "ebmlite", "triton==3.1.0", "scipy>=1.10.0", "datasets==2.21.0",  "dotenv", "decord", "scikit-image", "imagehash", "swanlab",
23 |     "torchcodec"
24 | ]
25 | 
26 | 
27 | 
28 | [project.urls]
29 | "Homepage" = "https://nvlabs.github.io/EAGLE/"
30 | "Bug Tracker" = "https://github.com/NVlabs/EAGLE/issues"
31 | 
32 | [tool.setuptools.packages.find]
33 | exclude = ["tools", "data*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "shell*", "work_dirs*", "pretrained*", "vid_playground*", "wandb*", "vlmeval*", "internel*", "streamlit*"]
34 | 
35 | [tool.wheel]
36 | exclude = ["tools", "data*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "shell*", "work_dirs*", "pretrained*", "vid_playground*", "wandb*", "vlmeval*", "internel*", "streamlit*"]
37 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/textcaps/textcaps_train.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/TextCaps
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "textcaps_train"
 5 | group : "textcaps_caption"
 6 | test_split: train
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.textcaps_doc_to_visual
 9 | doc_to_text: !function utils.textcaps_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   until:
13 |     - "ASSISTANT:"
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | process_results: !function utils.textcaps_process_result
20 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
21 | metric_list:
22 |   - metric: textcaps_Bleu_4 
23 |     aggregation : !function utils.textcaps_bleu4
24 |     higher_is_better : true
25 |   - metric: textcaps_Bleu_3
26 |     aggregation : !function utils.textcaps_bleu3
27 |     higher_is_better : true
28 |   - metric: textcaps_Bleu_2
29 |     aggregation : !function utils.textcaps_bleu2
30 |     higher_is_better : true
31 |   - metric: textcaps_Bleu_1
32 |     aggregation : !function utils.textcaps_bleu1
33 |     higher_is_better : true
34 |   - metric: textcaps_METEOR
35 |     aggregation : !function utils.textcaps_meteor
36 |     higher_is_better : true
37 |   - metric: textcaps_ROUGE_L
38 |     aggregation : !function utils.textcaps_rougel
39 |     higher_is_better : true
40 |   - metric: textcaps_CIDEr
41 |     aggregation : !function utils.textcaps_cider
42 |     higher_is_better : true
43 |   #- metric: textcaps_SPICE
44 |   #  aggregation : !function utils.textcaps_spice
45 |   #  higher_is_better : true
46 | metadata:
47 |   - version: 0.0
48 | include: _default_template_textcaps_yaml


--------------------------------------------------------------------------------
/Eagle/lmms_eval/filters/selection.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | 
 3 | from lmms_eval.api.filter import Filter
 4 | 
 5 | 
 6 | class TakeFirstFilter(Filter):
 7 |     def __init__(self) -> None:
 8 |         """
 9 |         Can define custom behavior here, if an individual instantiation of a Filter class should have state.
10 |         """
11 | 
12 |     def apply(self, resps, docs):
13 |         """
14 |         Assuming each entry of `resps` is a list of model responses, we discard all but the first response.
15 |         """
16 |         return map(lambda r: r[0], resps)
17 | 
18 | 
19 | class TakeKFilter(Filter):
20 |     def __init__(self, *args, **kwargs) -> None:
21 |         self.k = kwargs.pop("k")
22 | 
23 |         super().__init__(*args, **kwargs)
24 | 
25 |     def apply(self, resps, docs):
26 |         # check we have at least k responses per doc, else we can't take the first k
27 |         assert len(resps[0]) >= self.k, f"Need at least {self.k} responses per doc to take first {self.k}, but got {len(resps[0])} only! Please increase TaskConfig.repeats ."
28 |         return map(lambda r: r[: self.k], resps)
29 | 
30 | 
31 | class MajorityVoteFilter(Filter):
32 |     def __init__(self) -> None:
33 |         """
34 |         Can define custom behavior here, if an individual instantiation of a Filter class should have state.
35 |         """
36 | 
37 |     def apply(self, resps, docs):
38 |         """
39 |         Each entry of `resps` is a list of model responses.
40 |         We select the response that occurs most frequently in each entry of `resps`.
41 |         """
42 | 
43 |         def select_majority(resp):
44 |             counts = Counter(resp)
45 |             vote = counts.most_common(1)[0][0]
46 |             return vote
47 | 
48 |         return map(lambda r: [select_majority(r)], resps)
49 | 


--------------------------------------------------------------------------------
/Eagle/scripts/pretrain-eagle-x4-vicuna-13b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | # export WANDB_DISABLED="true"
 5 | export WANDB_PROJECT="eagle"
 6 | export WANDB_RUN_ID=${NAME}
 7 | export WANDB_RESUME="allow"
 8 | 
 9 | echo "MASTER_ADDR=$MASTER_ADDR"
10 | n_node=$SLURM_JOB_NUM_NODES
11 | echo "number of nodes:" $n_node
12 | echo "node rank:" $SLURM_PROCID
13 | 
14 | python -m torch.distributed.run \
15 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
16 |     --master_addr $MASTER_ADDR --master_port 25031 \
17 |     train_mem.py \
18 |     --deepspeed ./scripts/zero2.json \
19 |     --model_name_or_path lmsys/vicuna-13b-v1.5 \
20 |     --version plain \
21 |     --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \
22 |     --image_folder $PATH_TO_PRETRAINING_DATA/images \
23 |     --vision_tower "clip-448;convnext-1024;det-1024;pix2struct-1024" \
24 |     --mm_projector_type mlp2x_gelu \
25 |     --tune_mm_mlp_adapter True \
26 |     --mm_vision_select_layer -2 \
27 |     --mm_use_im_start_end False \
28 |     --mm_use_im_patch_token False \
29 |     --bf16 True \
30 |     --output_dir ./checkpoints/$NAME \
31 |     --num_train_epochs 1 \
32 |     --per_device_train_batch_size 8 \
33 |     --per_device_eval_batch_size 4 \
34 |     --gradient_accumulation_steps 1 \
35 |     --evaluation_strategy "no" \
36 |     --save_strategy "steps" \
37 |     --save_steps 24000 \
38 |     --save_total_limit 1 \
39 |     --learning_rate 1e-3 \
40 |     --weight_decay 0. \
41 |     --warmup_ratio 0.03 \
42 |     --lr_scheduler_type "cosine" \
43 |     --logging_steps 1 \
44 |     --tf32 True \
45 |     --model_max_length 2048 \
46 |     --gradient_checkpointing True \
47 |     --dataloader_num_workers 4 \
48 |     --lazy_preprocess True \
49 |     --report_to wandb \
50 |     --run_name ${NAME}
51 | 


--------------------------------------------------------------------------------
/Eagle/scripts/pretrain-eagle-x4-vicuna-7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | # export WANDB_DISABLED="true"
 5 | export WANDB_PROJECT="eagle"
 6 | export WANDB_RUN_ID=${NAME}
 7 | export WANDB_RESUME="allow"
 8 | 
 9 | echo "MASTER_ADDR=$MASTER_ADDR"
10 | n_node=$SLURM_JOB_NUM_NODES
11 | echo "number of nodes:" $n_node
12 | echo "node rank:" $SLURM_PROCID
13 | 
14 | python -m torch.distributed.run \
15 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
16 |     --master_addr $MASTER_ADDR --master_port 25031 \
17 |     train_mem.py \
18 |     --deepspeed ./scripts/zero2.json \
19 |     --model_name_or_path lmsys/vicuna-7b-v1.5 \
20 |     --version plain \
21 |     --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \
22 |     --image_folder $PATH_TO_PRETRAINING_DATA/images \
23 |     --vision_tower "clip-448;convnext-1024;det-1024;pix2struct-1024" \
24 |     --mm_projector_type mlp2x_gelu \
25 |     --tune_mm_mlp_adapter True \
26 |     --mm_vision_select_layer -2 \
27 |     --mm_use_im_start_end False \
28 |     --mm_use_im_patch_token False \
29 |     --bf16 True \
30 |     --output_dir ./checkpoints/$NAME \
31 |     --num_train_epochs 1 \
32 |     --per_device_train_batch_size 8 \
33 |     --per_device_eval_batch_size 4 \
34 |     --gradient_accumulation_steps 1 \
35 |     --evaluation_strategy "no" \
36 |     --save_strategy "steps" \
37 |     --save_steps 24000 \
38 |     --save_total_limit 1 \
39 |     --learning_rate 1e-3 \
40 |     --weight_decay 0. \
41 |     --warmup_ratio 0.03 \
42 |     --lr_scheduler_type "cosine" \
43 |     --logging_steps 1 \
44 |     --tf32 True \
45 |     --model_max_length 2048 \
46 |     --gradient_checkpointing True \
47 |     --dataloader_num_workers 4 \
48 |     --lazy_preprocess True \
49 |     --report_to wandb \
50 |     --run_name ${NAME}
51 | 


--------------------------------------------------------------------------------
/Eagle/scripts/pretrain-eagle-x5-vicuna-7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | # export WANDB_DISABLED="true"
 5 | export WANDB_PROJECT="eagle"
 6 | export WANDB_RUN_ID=${NAME}
 7 | export WANDB_RESUME="allow"
 8 | 
 9 | echo "MASTER_ADDR=$MASTER_ADDR"
10 | n_node=$SLURM_JOB_NUM_NODES
11 | echo "number of nodes:" $n_node
12 | echo "node rank:" $SLURM_PROCID
13 | 
14 | python -m torch.distributed.run \
15 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
16 |     --master_addr $MASTER_ADDR --master_port 25031 \
17 |     train_mem.py \
18 |     --deepspeed ./scripts/zero2.json \
19 |     --model_name_or_path lmsys/vicuna-7b-v1.5 \
20 |     --version plain \
21 |     --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \
22 |     --image_folder $PATH_TO_PRETRAINING_DATA/images \
23 |     --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \
24 |     --mm_projector_type mlp2x_gelu \
25 |     --tune_mm_mlp_adapter True \
26 |     --mm_vision_select_layer -2 \
27 |     --mm_use_im_start_end False \
28 |     --mm_use_im_patch_token False \
29 |     --bf16 True \
30 |     --output_dir ./checkpoints/$NAME \
31 |     --num_train_epochs 1 \
32 |     --per_device_train_batch_size 8 \
33 |     --per_device_eval_batch_size 4 \
34 |     --gradient_accumulation_steps 1 \
35 |     --evaluation_strategy "no" \
36 |     --save_strategy "steps" \
37 |     --save_steps 24000 \
38 |     --save_total_limit 1 \
39 |     --learning_rate 1e-3 \
40 |     --weight_decay 0. \
41 |     --warmup_ratio 0.03 \
42 |     --lr_scheduler_type "cosine" \
43 |     --logging_steps 1 \
44 |     --tf32 True \
45 |     --model_max_length 2048 \
46 |     --gradient_checkpointing True \
47 |     --dataloader_num_workers 4 \
48 |     --lazy_preprocess True \
49 |     --report_to wandb \
50 |     --run_name ${NAME}
51 | 


--------------------------------------------------------------------------------
/Eagle/scripts/pretrain_eagle_x5_vicuna_7b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | # export WANDB_DISABLED="true"
 5 | export WANDB_PROJECT="eagle"
 6 | export WANDB_RUN_ID=${NAME}
 7 | export WANDB_RESUME="allow"
 8 | 
 9 | echo "MASTER_ADDR=$MASTER_ADDR"
10 | n_node=$SLURM_JOB_NUM_NODES
11 | echo "number of nodes:" $n_node
12 | echo "node rank:" $SLURM_PROCID
13 | 
14 | python -m torch.distributed.run \
15 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
16 |     --master_addr $MASTER_ADDR --master_port 25031 \
17 |     train_mem.py \
18 |     --deepspeed ./scripts/zero2.json \
19 |     --model_name_or_path lmsys/vicuna-7b-v1.5 \
20 |     --version plain \
21 |     --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \
22 |     --image_folder $PATH_TO_PRETRAINING_DATA/images \
23 |     --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \
24 |     --mm_projector_type mlp2x_gelu \
25 |     --tune_mm_mlp_adapter True \
26 |     --mm_vision_select_layer -2 \
27 |     --mm_use_im_start_end False \
28 |     --mm_use_im_patch_token False \
29 |     --bf16 True \
30 |     --output_dir ./checkpoints/$NAME \
31 |     --num_train_epochs 1 \
32 |     --per_device_train_batch_size 8 \
33 |     --per_device_eval_batch_size 4 \
34 |     --gradient_accumulation_steps 1 \
35 |     --evaluation_strategy "no" \
36 |     --save_strategy "steps" \
37 |     --save_steps 24000 \
38 |     --save_total_limit 1 \
39 |     --learning_rate 1e-3 \
40 |     --weight_decay 0. \
41 |     --warmup_ratio 0.03 \
42 |     --lr_scheduler_type "cosine" \
43 |     --logging_steps 1 \
44 |     --tf32 True \
45 |     --model_max_length 2048 \
46 |     --gradient_checkpointing True \
47 |     --dataloader_num_workers 4 \
48 |     --lazy_preprocess True \
49 |     --report_to wandb \
50 |     --run_name ${NAME}
51 | 


--------------------------------------------------------------------------------
/Eagle/scripts/pretrain-eagle-x5-vicuna-13b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | # export WANDB_DISABLED="true"
 5 | export WANDB_PROJECT="eagle"
 6 | export WANDB_RUN_ID=${NAME}
 7 | export WANDB_RESUME="allow"
 8 | 
 9 | echo "MASTER_ADDR=$MASTER_ADDR"
10 | n_node=$SLURM_JOB_NUM_NODES
11 | echo "number of nodes:" $n_node
12 | echo "node rank:" $SLURM_PROCID
13 | 
14 | python -m torch.distributed.run \
15 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
16 |     --master_addr $MASTER_ADDR --master_port 25031 \
17 |     train_mem.py \
18 |     --deepspeed ./scripts/zero2.json \
19 |     --model_name_or_path lmsys/vicuna-13b-v1.5 \
20 |     --version plain \
21 |     --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \
22 |     --image_folder $PATH_TO_PRETRAINING_DATA/images \
23 |     --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \
24 |     --mm_projector_type mlp2x_gelu \
25 |     --tune_mm_mlp_adapter True \
26 |     --mm_vision_select_layer -2 \
27 |     --mm_use_im_start_end False \
28 |     --mm_use_im_patch_token False \
29 |     --bf16 True \
30 |     --output_dir ./checkpoints/$NAME \
31 |     --num_train_epochs 1 \
32 |     --per_device_train_batch_size 8 \
33 |     --per_device_eval_batch_size 4 \
34 |     --gradient_accumulation_steps 1 \
35 |     --evaluation_strategy "no" \
36 |     --save_strategy "steps" \
37 |     --save_steps 24000 \
38 |     --save_total_limit 1 \
39 |     --learning_rate 1e-3 \
40 |     --weight_decay 0. \
41 |     --warmup_ratio 0.03 \
42 |     --lr_scheduler_type "cosine" \
43 |     --logging_steps 1 \
44 |     --tf32 True \
45 |     --model_max_length 2048 \
46 |     --gradient_checkpointing True \
47 |     --dataloader_num_workers 4 \
48 |     --lazy_preprocess True \
49 |     --report_to wandb \
50 |     --run_name ${NAME}
51 | 


--------------------------------------------------------------------------------
/Eagle/scripts/pretrain-eagle-x5-llama3-8b.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | # export WANDB_DISABLED="true"
 5 | export WANDB_PROJECT="eagle"
 6 | export WANDB_RUN_ID=${NAME}
 7 | export WANDB_RESUME="allow"
 8 | 
 9 | echo "MASTER_ADDR=$MASTER_ADDR"
10 | n_node=$SLURM_JOB_NUM_NODES
11 | echo "number of nodes:" $n_node
12 | echo "node rank:" $SLURM_PROCID
13 | 
14 | python -m torch.distributed.run \
15 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
16 |     --master_addr $MASTER_ADDR --master_port 25031 \
17 |     train_mem.py \
18 |     --deepspeed ./scripts/zero2.json \
19 |     --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
20 |     --version plain \
21 |     --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \
22 |     --image_folder $PATH_TO_PRETRAINING_DATA/images \
23 |     --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \
24 |     --mm_projector_type mlp2x_gelu \
25 |     --tune_mm_mlp_adapter True \
26 |     --mm_vision_select_layer -2 \
27 |     --mm_use_im_start_end False \
28 |     --mm_use_im_patch_token False \
29 |     --bf16 True \
30 |     --output_dir ./checkpoints/$NAME \
31 |     --num_train_epochs 1 \
32 |     --per_device_train_batch_size 8 \
33 |     --per_device_eval_batch_size 4 \
34 |     --gradient_accumulation_steps 1 \
35 |     --evaluation_strategy "no" \
36 |     --save_strategy "steps" \
37 |     --save_steps 24000 \
38 |     --save_total_limit 1 \
39 |     --learning_rate 1e-3 \
40 |     --weight_decay 0. \
41 |     --warmup_ratio 0.03 \
42 |     --lr_scheduler_type "cosine" \
43 |     --logging_steps 1 \
44 |     --tf32 True \
45 |     --model_max_length 2048 \
46 |     --gradient_checkpointing True \
47 |     --dataloader_num_workers 4 \
48 |     --lazy_preprocess True \
49 |     --report_to wandb \
50 |     --run_name ${NAME}
51 | 


--------------------------------------------------------------------------------
/Eagle/scripts/finetune-eagle-x4-vicuna-13b-1.8m.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | export WANDB_PROJECT="eagle"
 5 | export WANDB_RUN_ID=${NAME}
 6 | export WANDB_RESUME="allow"
 7 | 
 8 | echo "MASTER_ADDR=$MASTER_ADDR"
 9 | n_node=$SLURM_JOB_NUM_NODES
10 | echo "number of nodes:" $n_node
11 | echo "node rank:" $SLURM_PROCID
12 | 
13 | python -m torch.distributed.run \
14 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
15 |     --master_addr $MASTER_ADDR --master_port 25031 \
16 |     train_mem.py \
17 |     --deepspeed ./scripts/zero2.json \
18 |     --model_name_or_path lmsys/vicuna-13b-v1.5 \
19 |     --version v1 \
20 |     --data_path $PATH_TO_SFT_DATA/eagle-sft-v1-1_8m.json \
21 |     --image_folder $PATH_TO_SFT_DATA/images \
22 |     --vision_tower "clip-448;convnext-1024;det-1024;pix2struct-1024" \
23 |     --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \
24 |     --mm_projector_type mlp2x_gelu \
25 |     --mm_vision_select_layer -2 \
26 |     --mm_use_im_start_end False \
27 |     --mm_use_im_patch_token False \
28 |     --image_aspect_ratio pad \
29 |     --group_by_modality_length True \
30 |     --bf16 True \
31 |     --output_dir ./checkpoints/$NAME \
32 |     --num_train_epochs 1 \
33 |     --per_device_train_batch_size 4 \
34 |     --per_device_eval_batch_size 4 \
35 |     --gradient_accumulation_steps 1 \
36 |     --evaluation_strategy "no" \
37 |     --save_strategy "steps" \
38 |     --save_steps 500 \
39 |     --save_total_limit 1 \
40 |     --learning_rate 2e-5 \
41 |     --weight_decay 0. \
42 |     --warmup_ratio 0.03 \
43 |     --lr_scheduler_type "cosine" \
44 |     --logging_steps 1 \
45 |     --tf32 True \
46 |     --model_max_length 2048 \
47 |     --gradient_checkpointing True \
48 |     --dataloader_num_workers 4 \
49 |     --lazy_preprocess True \
50 |     --report_to wandb \
51 |     --run_name ${NAME}  
52 | 


--------------------------------------------------------------------------------
/Eagle/scripts/finetune-eagle-x4-vicuna-7b-1.8m.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | export WANDB_PROJECT="eagle"
 5 | export WANDB_RUN_ID=${NAME}
 6 | export WANDB_RESUME="allow"
 7 | 
 8 | echo "MASTER_ADDR=$MASTER_ADDR"
 9 | n_node=$SLURM_JOB_NUM_NODES
10 | echo "number of nodes:" $n_node
11 | echo "node rank:" $SLURM_PROCID
12 | 
13 | python -m torch.distributed.run \
14 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
15 |     --master_addr $MASTER_ADDR --master_port 25031 \
16 |     train_mem.py \
17 |     --deepspeed ./scripts/zero2.json \
18 |     --model_name_or_path lmsys/vicuna-7b-v1.5 \
19 |     --version v1 \
20 |     --data_path $PATH_TO_SFT_DATA/eagle-sft-v1-1_8m.json \
21 |     --image_folder $PATH_TO_SFT_DATA/images \
22 |     --vision_tower "clip-448;convnext-1024;det-1024;pix2struct-1024" \
23 |     --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \
24 |     --mm_projector_type mlp2x_gelu \
25 |     --mm_vision_select_layer -2 \
26 |     --mm_use_im_start_end False \
27 |     --mm_use_im_patch_token False \
28 |     --image_aspect_ratio pad \
29 |     --group_by_modality_length True \
30 |     --bf16 True \
31 |     --output_dir ./checkpoints/$NAME \
32 |     --num_train_epochs 1 \
33 |     --per_device_train_batch_size 4 \
34 |     --per_device_eval_batch_size 4 \
35 |     --gradient_accumulation_steps 1 \
36 |     --evaluation_strategy "no" \
37 |     --save_strategy "steps" \
38 |     --save_steps 500 \
39 |     --save_total_limit 1 \
40 |     --learning_rate 2e-5 \
41 |     --weight_decay 0. \
42 |     --warmup_ratio 0.03 \
43 |     --lr_scheduler_type "cosine" \
44 |     --logging_steps 1 \
45 |     --tf32 True \
46 |     --model_max_length 2048 \
47 |     --gradient_checkpointing True \
48 |     --dataloader_num_workers 4 \
49 |     --lazy_preprocess True \
50 |     --report_to wandb \
51 |     --run_name ${NAME}  
52 | 


--------------------------------------------------------------------------------
/Eagle/scripts/finetune-eagle-x5-vicuna-7b-1.8m.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | export WANDB_PROJECT="eagle"
 5 | export WANDB_RUN_ID=${NAME}
 6 | export WANDB_RESUME="allow"
 7 | 
 8 | echo "MASTER_ADDR=$MASTER_ADDR"
 9 | n_node=$SLURM_JOB_NUM_NODES
10 | echo "number of nodes:" $n_node
11 | echo "node rank:" $SLURM_PROCID
12 | 
13 | python -m torch.distributed.run \
14 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
15 |     --master_addr $MASTER_ADDR --master_port 25031 \
16 |     train_mem.py \
17 |     --deepspeed ./scripts/zero2.json \
18 |     --model_name_or_path lmsys/vicuna-7b-v1.5 \
19 |     --version v1 \
20 |     --data_path $PATH_TO_SFT_DATA/eagle-sft-v1-1_8m.json \
21 |     --image_folder $PATH_TO_SFT_DATA/images \
22 |     --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \
23 |     --mm_projector_type mlp2x_gelu \
24 |     --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \
25 |     --mm_vision_select_layer -2 \
26 |     --mm_use_im_start_end False \
27 |     --mm_use_im_patch_token False \
28 |     --image_aspect_ratio pad \
29 |     --group_by_modality_length True \
30 |     --bf16 True \
31 |     --output_dir ./checkpoints/$NAME \
32 |     --num_train_epochs 1 \
33 |     --per_device_train_batch_size 4 \
34 |     --per_device_eval_batch_size 4 \
35 |     --gradient_accumulation_steps 1 \
36 |     --evaluation_strategy "no" \
37 |     --save_strategy "steps" \
38 |     --save_steps 500 \
39 |     --save_total_limit 1 \
40 |     --learning_rate 2e-5 \
41 |     --weight_decay 0. \
42 |     --warmup_ratio 0.03 \
43 |     --lr_scheduler_type "cosine" \
44 |     --logging_steps 1 \
45 |     --tf32 True \
46 |     --model_max_length 2048 \
47 |     --gradient_checkpointing True \
48 |     --dataloader_num_workers 4 \
49 |     --lazy_preprocess True \
50 |     --report_to wandb \
51 |     --run_name ${NAME}  
52 | 


--------------------------------------------------------------------------------
/Eagle/scripts/finetune-eagle-x5-vicuna-13b-1.8m.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | export WANDB_PROJECT="eagle"
 5 | export WANDB_RUN_ID=${NAME}
 6 | export WANDB_RESUME="allow"
 7 | 
 8 | echo "MASTER_ADDR=$MASTER_ADDR"
 9 | n_node=$SLURM_JOB_NUM_NODES
10 | echo "number of nodes:" $n_node
11 | echo "node rank:" $SLURM_PROCID
12 | 
13 | python -m torch.distributed.run \
14 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
15 |     --master_addr $MASTER_ADDR --master_port 25031 \
16 |     train_mem.py \
17 |     --deepspeed ./scripts/zero2.json \
18 |     --model_name_or_path lmsys/vicuna-13b-v1.5 \
19 |     --version v1 \
20 |     --data_path $PATH_TO_SFT_DATA/eagle-sft-v1-1_8m.json \
21 |     --image_folder $PATH_TO_SFT_DATA/images \
22 |     --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \
23 |     --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \
24 |     --mm_projector_type mlp2x_gelu \
25 |     --mm_vision_select_layer -2 \
26 |     --mm_use_im_start_end False \
27 |     --mm_use_im_patch_token False \
28 |     --image_aspect_ratio pad \
29 |     --group_by_modality_length True \
30 |     --bf16 True \
31 |     --output_dir ./checkpoints/$NAME \
32 |     --num_train_epochs 1 \
33 |     --per_device_train_batch_size 4 \
34 |     --per_device_eval_batch_size 4 \
35 |     --gradient_accumulation_steps 1 \
36 |     --evaluation_strategy "no" \
37 |     --save_strategy "steps" \
38 |     --save_steps 500 \
39 |     --save_total_limit 1 \
40 |     --learning_rate 2e-5 \
41 |     --weight_decay 0. \
42 |     --warmup_ratio 0.03 \
43 |     --lr_scheduler_type "cosine" \
44 |     --logging_steps 1 \
45 |     --tf32 True \
46 |     --model_max_length 2048 \
47 |     --gradient_checkpointing True \
48 |     --dataloader_num_workers 4 \
49 |     --lazy_preprocess True \
50 |     --report_to wandb \
51 |     --run_name ${NAME}  
52 | 


--------------------------------------------------------------------------------
/Eagle/scripts/pretrain-eagle-x5-yi34b-cambrian.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | # We use 128 GPU to pretrain the Yi-34B model, the total batch size is 1024.
 4 | 
 5 | # export WANDB_DISABLED="true"
 6 | export WANDB_PROJECT="eagle"
 7 | export WANDB_RUN_ID=${NAME}
 8 | export WANDB_RESUME="allow"
 9 | 
10 | echo "MASTER_ADDR=$MASTER_ADDR"
11 | n_node=$SLURM_JOB_NUM_NODES
12 | echo "number of nodes:" $n_node
13 | echo "node rank:" $SLURM_PROCID
14 | 
15 | python -m torch.distributed.run \
16 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
17 |     --master_addr $MASTER_ADDR --master_port 25031 \
18 |     train_mem.py \
19 |     --deepspeed ./scripts/zero3.json \
20 |     --model_name_or_path NousResearch/Nous-Hermes-2-Yi-34B \
21 |     --version plain \
22 |     --data_path $PATH_TO_CAMBRIAN_PRETRAINING_DATA \
23 |     --image_folder $PATH_TO_CAMBRIAN_PRETRAINING_DATA \
24 |     --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \
25 |     --mm_projector_type mlp2x_gelu \
26 |     --tune_mm_mlp_adapter True \
27 |     --mm_vision_select_layer -2 \
28 |     --mm_use_im_start_end False \
29 |     --mm_use_im_patch_token False \
30 |     --bf16 True \
31 |     --output_dir ./checkpoints/$NAME \
32 |     --num_train_epochs 1 \
33 |     --per_device_train_batch_size 8 \
34 |     --per_device_eval_batch_size 4 \
35 |     --gradient_accumulation_steps 1 \
36 |     --evaluation_strategy "no" \
37 |     --save_strategy "steps" \
38 |     --save_steps 24000 \
39 |     --save_total_limit 1 \
40 |     --learning_rate 2e-4 \
41 |     --weight_decay 0. \
42 |     --warmup_ratio 0.03 \
43 |     --lr_scheduler_type "cosine" \
44 |     --logging_steps 1 \
45 |     --tf32 True \
46 |     --model_max_length 2048 \
47 |     --gradient_checkpointing True \
48 |     --dataloader_num_workers 4 \
49 |     --lazy_preprocess True \
50 |     --report_to wandb \
51 |     --run_name ${NAME}


--------------------------------------------------------------------------------
/Eagle/scripts/finetune-eagle-x5-llama3-8b-1.8m.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | export WANDB_PROJECT="eagle"
 5 | export WANDB_RUN_ID=${NAME}
 6 | export WANDB_RESUME="allow"
 7 | 
 8 | echo "MASTER_ADDR=$MASTER_ADDR"
 9 | n_node=$SLURM_JOB_NUM_NODES
10 | echo "number of nodes:" $n_node
11 | echo "node rank:" $SLURM_PROCID
12 | 
13 | python -m torch.distributed.run \
14 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
15 |     --master_addr $MASTER_ADDR --master_port 25031 \
16 |     train_mem.py \
17 |     --deepspeed ./scripts/zero2.json \
18 |     --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
19 |     --version llama3 \
20 |     --data_path $PATH_TO_SFT_DATA/eagle-sft-v1-1_8m.json \
21 |     --image_folder $PATH_TO_SFT_DATA/images \
22 |     --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \
23 |     --mm_projector_type mlp2x_gelu \
24 |     --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \
25 |     --mm_vision_select_layer -2 \
26 |     --mm_use_im_start_end False \
27 |     --mm_use_im_patch_token False \
28 |     --image_aspect_ratio pad \
29 |     --group_by_modality_length True \
30 |     --bf16 True \
31 |     --output_dir ./checkpoints/$NAME \
32 |     --num_train_epochs 1 \
33 |     --per_device_train_batch_size 4 \
34 |     --per_device_eval_batch_size 4 \
35 |     --gradient_accumulation_steps 1 \
36 |     --evaluation_strategy "no" \
37 |     --save_strategy "steps" \
38 |     --save_steps 500 \
39 |     --save_total_limit 1 \
40 |     --learning_rate 2e-5 \
41 |     --weight_decay 0. \
42 |     --warmup_ratio 0.03 \
43 |     --lr_scheduler_type "cosine" \
44 |     --logging_steps 1 \
45 |     --tf32 True \
46 |     --model_max_length 2048 \
47 |     --gradient_checkpointing True \
48 |     --dataloader_num_workers 4 \
49 |     --lazy_preprocess True \
50 |     --report_to wandb \
51 |     --run_name ${NAME}  
52 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/scienceqa/utils.py:
--------------------------------------------------------------------------------
 1 | def sqa_doc_to_text(doc, model_specific_prompt_kwargs=None):
 2 |     context, question, choices = doc["hint"], doc["question"], doc["choices"]
 3 |     len_choices = len(choices)
 4 |     options = [chr(ord("A") + i) for i in range(len_choices)]
 5 |     choices_str = "\n".join([f"{option}. {choice}" for option, choice in zip(options, choices)])
 6 |     if model_specific_prompt_kwargs["format"] == "default":
 7 |         if context:
 8 |             context = f"Context: {context}\n"
 9 | 
10 |         post_prompt = model_specific_prompt_kwargs["post_prompt"]
11 |         pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
12 |         return f"{pre_prompt}{context}{question}\n{choices_str}{post_prompt}"
13 |     elif model_specific_prompt_kwargs["format"] == "qwen_vl":
14 |         prompt = "Context: {}\nQuestion: {}\nOptions: {}\nAnswer:"
15 |         context = context if context else "N/A"
16 |         prompt = prompt.format(context, question, choices_str)
17 |         return prompt
18 |     else:
19 |         raise ValueError(f"Unknown prompt format: {model_specific_prompt_kwargs}")
20 | 
21 | 
22 | def sqa_doc_to_visual(doc):
23 |     if doc["image"] is None:
24 |         return []
25 |     return [doc["image"].convert("RGB")]
26 | 
27 | 
28 | def sqa_doc_to_target(doc):
29 |     len_choices = len(doc["choices"])
30 |     options = [chr(ord("A") + i) for i in range(len_choices)]
31 |     return options[doc["answer"]]
32 | 
33 | 
34 | def sqa_process_results(doc, results):
35 |     # I know this is weird, but it's how llava parse it.
36 |     target = sqa_doc_to_target(doc)
37 |     pred = results[0]
38 |     if pred == target:
39 |         return {"exact_match": 1.0}
40 |     # pattern: ^[A-Z]\. .*
41 |     if len(pred) >= 2 and pred[0].isupper() and pred[1] == ".":
42 |         result = 1.0 if pred[0] == target else 0.0
43 |         return {"exact_match": result}
44 |     return {"exact_match": 0.0}
45 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/seedbench_2/seedbench_2.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/SEED-Bench-2
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "seedbench-2"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.seed_doc_to_visual
 8 | doc_to_text: !function utils.seed_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 16
14 |   image_aspect_ratio: original
15 | # The return value of process_results will be used by metrics
16 | process_results: !function utils.seed_process_result
17 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
18 | metric_list:
19 |   - metric: seed_Video
20 |     aggregation: !function utils.seed_aggregation_result
21 |     higher_is_better: true
22 |   - metric: seed_Multiple_Images
23 |     aggregation: !function utils.seed_aggregation_result
24 |     higher_is_better: true
25 |   - metric: seed_Image_&_Text_Generation
26 |     aggregation: !function utils.seed_aggregation_result
27 |     higher_is_better: true
28 |   - metric: seed_Single_Image
29 |     aggregation: !function utils.seed_aggregation_result
30 |     higher_is_better: true
31 |   - metric: seed_Image_Generation
32 |     aggregation: !function utils.seed_aggregation_result
33 |     higher_is_better: true
34 |   - metric: seed_Interleaved_Image
35 |     aggregation: !function utils.seed_aggregation_result
36 |     higher_is_better: true
37 |   - metric: seed_all
38 |     aggregation: !function utils.seed_aggregation_result
39 |     higher_is_better: true
40 | metadata:
41 |   - version: 0.0
42 | 
43 | model_specific_prompt_kwargs:
44 |   llava :
45 |     img_token : <image>
46 |     post_prompt : "Answer with the option's letter from the given choices directly."
47 |   gpt4V :
48 |     img_token : <image>
49 |     post_prompt : "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/Eagle/scripts/finetune-eagle-x5-yi-34b-cambrian-7m.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | NAME=$1
 3 | 
 4 | # We fine-tune this model on 256 A100 GPU
 5 | export WANDB_PROJECT="eagle"
 6 | export WANDB_RUN_ID=${NAME}
 7 | export WANDB_RESUME="allow"
 8 | 
 9 | echo "MASTER_ADDR=$MASTER_ADDR"
10 | n_node=$SLURM_JOB_NUM_NODES
11 | echo "number of nodes:" $n_node
12 | echo "node rank:" $SLURM_PROCID
13 | 
14 | python -m torch.distributed.run \
15 |     --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
16 |     --master_addr $MASTER_ADDR --master_port 25031 \
17 |     train_mem.py \
18 |     --deepspeed ./scripts/zero3.json \
19 |     --model_name_or_path NousResearch/Nous-Hermes-2-Yi-34B \
20 |     --version yi_34b_chatml_direct \
21 |     --data_path $PATH_TO_CAMBRIAN_SFT_DATA \
22 |     --image_folder $PATH_TO_CAMBRIAN_SFT_DATA \
23 |     --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \
24 |     --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \
25 |     --mm_projector_type mlp2x_gelu \
26 |     --mm_vision_select_layer -2 \
27 |     --mm_use_im_start_end False \
28 |     --mm_use_im_patch_token False \
29 |     --image_aspect_ratio pad \
30 |     --group_by_modality_length True \
31 |     --bf16 True \
32 |     --output_dir ./checkpoints/$NAME \
33 |     --num_train_epochs 1 \
34 |     --per_device_train_batch_size 4 \
35 |     --per_device_eval_batch_size 4 \
36 |     --gradient_accumulation_steps 1 \
37 |     --evaluation_strategy "no" \
38 |     --save_strategy "steps" \
39 |     --save_steps 500 \
40 |     --save_total_limit 1 \
41 |     --learning_rate 2e-5 \
42 |     --weight_decay 0. \
43 |     --warmup_ratio 0.03 \
44 |     --lr_scheduler_type "cosine" \
45 |     --logging_steps 1 \
46 |     --tf32 True \
47 |     --model_max_length 2048 \
48 |     --gradient_checkpointing True \
49 |     --dataloader_num_workers 4 \
50 |     --lazy_preprocess True \
51 |     --report_to wandb \
52 |     --run_name ${NAME}  
53 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/filters/extraction.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from lmms_eval.api.filter import Filter
 4 | 
 5 | 
 6 | class RegexFilter(Filter):
 7 |     """ """
 8 | 
 9 |     def __init__(self, regex_pattern: str = r"#### (\-?[0-9\.\,]+)", fallback: str = "[invalid]") -> None:
10 |         """
11 |         pass a string `regex` to run `re.compile(r"regex")` on.
12 |         `fallback` defines the output returned if no matches for the regex are located.
13 |         """
14 |         self.regex_pattern = regex_pattern
15 |         self.regex = re.compile(regex_pattern)
16 |         self.fallback = fallback
17 | 
18 |     def apply(self, resps, docs):
19 |         # here, we assume we have a list, in which each element is
20 |         # a list of model responses for some particular input/target pair.
21 |         # so we process each of these (same input/target response sets)
22 |         # independently (and keep them a list.)
23 |         def filter_set(inst):
24 |             filtered = []
25 |             for resp in inst:
26 |                 match = self.regex.search(resp)
27 |                 if match:
28 |                     match = match.group(1).strip()
29 |                 else:
30 |                     match = self.fallback
31 |                 filtered.append(match)
32 |             return filtered
33 | 
34 |         # print(resps)
35 |         filtered_resps = list(map(lambda x: filter_set(x), resps))
36 |         # print(filtered_resps)
37 | 
38 |         return filtered_resps
39 | 
40 | 
41 | class WhitespaceFilter(Filter):
42 |     """ """
43 | 
44 |     def __init__(self) -> None:
45 |         pass
46 | 
47 |     def apply(self, resps, docs):
48 |         def filter_set(inst):
49 |             filtered_resp = []
50 |             for resp in inst:
51 |                 if resp.startswith(" "):
52 |                     resp = resp[1:]
53 | 
54 |                 filtered_resp.append(resp)
55 | 
56 |             return filtered_resp
57 | 
58 |         filtered_resps = [filter_set(resp) for resp in resps]
59 | 
60 |         return filtered_resps
61 | 


--------------------------------------------------------------------------------
/Eagle2_5/streamlit_demo/sd_worker.py:
--------------------------------------------------------------------------------
 1 | # This file is adopted from the InternVL project
 2 | # (https://github.com/OpenGVLab/InternVL), licensed under the MIT License.
 3 | # 
 4 | # --------------------------------------------------------
 5 | # InternVL
 6 | # Copyright (c) 2023 OpenGVLab
 7 | # Licensed under The MIT License
 8 | # --------------------------------------------------------
 9 | 
10 | from io import BytesIO
11 | 
12 | import torch
13 | from diffusers import StableDiffusion3Pipeline
14 | from fastapi import FastAPI
15 | from fastapi.responses import Response
16 | from pydantic import BaseModel
17 | 
18 | # Initialize pipeline
19 | pipe = StableDiffusion3Pipeline.from_pretrained('stabilityai/stable-diffusion-3-medium-diffusers',
20 |                                                 torch_dtype=torch.float16)
21 | pipe = pipe.to('cuda')
22 | 
23 | # Create a FastAPI application
24 | app = FastAPI()
25 | 
26 | 
27 | # Define the input data model
28 | class CaptionRequest(BaseModel):
29 |     caption: str
30 | 
31 | 
32 | # Defining API endpoints
33 | @app.post('/generate_image/')
34 | async def generate_image(request: CaptionRequest):
35 |     caption = request.caption
36 |     negative_prompt = 'blurry, low resolution, artifacts, unnatural, poorly drawn, bad anatomy, out of focus'
37 |     image = pipe(
38 |         caption,
39 |         negative_prompt=negative_prompt,
40 |         num_inference_steps=20,
41 |         guidance_scale=7.0
42 |     ).images[0]
43 | 
44 |     # Converts an image to a byte stream
45 |     img_byte_arr = BytesIO()
46 |     image.save(img_byte_arr, format='PNG')
47 |     img_byte_arr = img_byte_arr.getvalue()
48 | 
49 |     return Response(content=img_byte_arr, media_type='image/png')
50 | 
51 | 
52 | # Run the Uvicorn server
53 | if __name__ == '__main__':
54 |     import argparse
55 | 
56 |     import uvicorn
57 |     parser = argparse.ArgumentParser()
58 |     parser.add_argument('--port', default=11005, type=int)
59 |     args = parser.parse_args()
60 | 
61 |     uvicorn.run(app, host='0.0.0.0', port=args.port)
62 | 


--------------------------------------------------------------------------------
/Eagle2_5/document/1.installing.md:
--------------------------------------------------------------------------------
 1 | # Installation Guide for Eagle2.5
 2 | 
 3 | 
 4 | **Step 1: Create a New Conda Environment** *(If Docker not used)*
 5 | 
 6 | Create an environment named `eagle` with Python 3.10:
 7 | ```bash
 8 | conda create -n eagle python=3.10
 9 | conda activate eagle
10 | ```
11 | 
12 | ---
13 | 
14 | **Step 2: Install PyTorch and Flash Attention** *(If Docker not used; match your CUDA)*
15 | 
16 | Install PyTorch and Flash Attention:
17 | ```bash
18 | pip install torch==2.5.0 --index-url https://download.pytorch.org/whl/cu124
19 | pip install flash-attn==2.4.2 --no-build-isolation
20 | ```
21 | 
22 | (optional) Flash-Attention 3 for Hopper
23 | ```
24 | # Flash-Attention 3 for Hopper
25 | git clone https://github.com/Dao-AILab/flash-attention
26 | cd flash-attention/hopper
27 | python setup.py install
28 | ```
29 | ---
30 | 
31 | **Step 3: Install Eagle2.5**
32 | 
33 | Clone and install:
34 | ```bash
35 | # Clone the repository
36 | git clone -b main --single-branch https://github.com/NVlabs/Eagle.git
37 | 
38 | # Navigate to the working directory and install
39 | cd Eagle/Eagle2_5
40 | pip install -e .
41 | ```
42 | 
43 | ---
44 | 
45 | 
46 | ## 🚨 Troubleshooting Common Issues
47 | 
48 | ### Transformers Installation
49 | 
50 | The project requires `transformers==4.51.0` as specified in `pyproject.toml`. If you encounter issues installing this version, follow these steps:
51 | 
52 | 1. Remove the transformers dependency `"transformers"==4.51.0` from `pyproject.toml`
53 | 2. Install transformers manually using the following commands:
54 |    ```bash
55 |    git clone https://github.com/huggingface/transformers.git
56 |    cd transformers
57 |    git checkout 279c2e302ae4993986d6681c5885990c55eb5972
58 |    pip install -e .
59 |    ```
60 | 
61 | ### 🐞 OpenCV-related Issues
62 | 
63 | - **Error:** `ImportError: libGL.so.1: cannot open shared object file`
64 | 
65 |   Fix by installing `libgl1`:
66 |   ```bash
67 |   sudo apt update
68 |   sudo apt install libgl1
69 |   ```
70 | 
71 | ✅ **Setup complete!** You're now ready to use Eagle2.5.
72 | 


--------------------------------------------------------------------------------
/Eagle/eagle/model/multimodal_projector/builder.py:
--------------------------------------------------------------------------------
 1 | # This file is derived from the LLaVA project
 2 | # (https://github.com/haotian-liu/LLaVA), which is licensed under
 3 | # the Apache License, Version 2.0.
 4 | #
 5 | # --------------------------------------------------------
 6 | # LLaVA
 7 | # Copyright (c) 2023 Haotian Liu
 8 | # Licensed under the Apache License, Version 2.0
 9 | # --------------------------------------------------------
10 | 
11 | import torch
12 | import torch.nn as nn
13 | import re
14 | 
15 | class IdentityMap(nn.Module):
16 |     def __init__(self):
17 |         super().__init__()
18 | 
19 |     def forward(self, x, *args, **kwargs):
20 |         return x
21 | 
22 |     @property
23 |     def config(self):
24 |         return {"mm_projector_type": 'identity'}
25 | 
26 | 
27 | class SimpleResBlock(nn.Module):
28 |     def __init__(self, channels):
29 |         super().__init__()
30 |         self.pre_norm = nn.LayerNorm(channels)
31 | 
32 |         self.proj = nn.Sequential(
33 |             nn.Linear(channels, channels),
34 |             nn.GELU(),
35 |             nn.Linear(channels, channels)
36 |         )
37 |     def forward(self, x):
38 |         x = self.pre_norm(x)
39 |         return x + self.proj(x)
40 | 
41 | 
42 | def build_vision_projector(config, delay_load=False, fpn_input_dim=[], **kwargs):
43 |     projector_type = getattr(config, 'mm_projector_type', 'linear')
44 | 
45 |     if projector_type == 'linear':
46 |         return nn.Linear(config.mm_hidden_size, config.hidden_size)
47 | 
48 |     mlp_gelu_match = re.match(r'^mlp(\d+)x_gelu$', projector_type)
49 |     if mlp_gelu_match:
50 |         mlp_depth = int(mlp_gelu_match.group(1))
51 |         modules = [nn.Linear(config.mm_hidden_size, config.hidden_size)]
52 |         for _ in range(1, mlp_depth):
53 |             modules.append(nn.GELU())
54 |             modules.append(nn.Linear(config.hidden_size, config.hidden_size))
55 |         return nn.Sequential(*modules)
56 | 
57 |     if projector_type == 'identity':
58 |         return IdentityMap()
59 | 
60 |     raise ValueError(f'Unknown projector type: {projector_type}')
61 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/seedbench/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | 
 4 | def seed_doc_to_visual(doc):
 5 |     return [image.convert("RGB") for image in doc["image"]]
 6 | 
 7 | 
 8 | def seed_doc_to_text(doc):
 9 |     question = doc["question"]
10 |     question += "\n" + f"A. {doc['choice_a']}\n"
11 |     question += f"B. {doc['choice_b']}\n"
12 |     question += f"C. {doc['choice_c']}\n"
13 |     question += f"D. {doc['choice_d']}"
14 |     return f"{question}\nAnswer with the option's letter from the given choices directly."
15 | 
16 | 
17 | def seed_process_result(doc, result):
18 |     pred = result[0].strip()
19 |     if len(pred) > 1:
20 |         pred = pred[0]
21 |     answer = doc["answer"]
22 |     data_type = doc["data_type"]
23 | 
24 |     return {f"seed_{data_type}": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}, f"seed_all": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}}
25 | 
26 | 
27 | def seed_aggregation_result(results):
28 |     total_count = 0
29 |     total_correct = 0
30 |     for result in results:
31 |         if result["pred"] == result["answer"]:
32 |             total_correct += 1
33 |         total_count += 1
34 |     return total_correct / total_count
35 | 
36 | 
37 | def seed_aggregation_result_all(results):
38 |     score = seed_aggregation_result(results)
39 |     stored_results = []
40 |     for result in results:
41 |         stored_results.append({"question_id": result["question_id"], "prediction": result["pred"]})
42 |     with open("./seed_submission.json", "w") as f:
43 |         json.dump(stored_results, f, indent=4)
44 |     print("Storing files for seed_submission ...")
45 | 
46 |     return score
47 | 
48 | 
49 | def seed_doc_to_text_mc(doc):
50 |     question = doc["question"]
51 |     return f"{question} Answer :"
52 | 
53 | 
54 | def seed_doc_to_choice(doc):
55 |     return [doc["choice_a"], doc["choice_b"], doc["choice_c"], doc["choice_d"]]
56 | 
57 | 
58 | def seed_doc_to_mc_target(doc):
59 |     answer2choice = {"A": "choice_a", "B": "choice_b", "C": "choice_c", "D": "choice_d"}
60 |     return doc[answer2choice[doc["answer"]]]
61 | 


--------------------------------------------------------------------------------
/Eagle2_5/document/3.training.md:
--------------------------------------------------------------------------------
 1 | # 🚀 Starting Eagle Training
 2 | 
 3 | ### ✅ Prerequisites
 4 | 
 5 | Make sure you have completed these steps:
 6 | 
 7 | - [1. Setting Environment Variables](./1.setting_env_file.md)
 8 | - [2. Installing Eagle Environment](./3.installing.md)
 9 | - [3. Preparing Playground Data](./4.preparing_playground.md)
10 | 
11 | ### 🟢 Launching Stage-1 Training
12 | Typically, you only need to train on top of our pre-trained model. If you want to start from a base LLM and a Vision Encoder, 
13 | please refer to this script: [stage-1 training](./stage_1_training.md)
14 | 
15 | ### 🟢 Launching Finetuning
16 | 
17 | Run the following command to start training:
18 | 
19 | ```bash
20 | GPUS=8 bash shell/train_stage2.sh 1 work_dirs/eagle2.5_debug
21 | ```
22 | 
23 | If everything goes smoothly, your first step log should look like this:
24 | 
25 | ```bash
26 | [2025-04-03 06:56:30,930] [INFO] [logging.py:128:log_dist] [Rank 0] time (ms) | optimizer_step: 11.44
27 | [2025-04-03 06:56:30,931] [INFO] [logging.py:128:log_dist] [Rank 0] time (ms) | fwd_microstep: 1148.25 | bwd_microstep: 7335.70 | bwd_inner_microstep: 1056.00 | bwd_allreduce_microstep: 6279.59 | step_microstep: 71.91
28 | [2025-04-03 06:56:30,931] [INFO] [logging.py:128:log_dist] [Rank 0] time (ms) | fwd: 1148.25 | bwd: 7335.70 | bwd_inner: 1056.03 | bwd_allreduce: 6279.59 | step: 71.91
29 | [Step 1 | Rank 0 / GPU 0] Memory: 18552.12 MB, Temperature: 32°C, Power: 121.13 W,
30 | {'loss': 0.9605, 'grad_norm': 1.3685229204449167, 'learning_rate': 2.5e-06, 'epoch': 0.0}
31 |   0%|▌ | 1/248 [00:09<38:40,  9.40s/it]
32 | ```
33 | 
34 | ### Script Parameter Explanation
35 | 
36 | We provide detailed documentation explaining all parameters and variables used in our training launch scripts. This includes environment variables, calculated variables, torchrun launcher arguments, and training script parameters.
37 | 
38 | For comprehensive information, please refer to our [Script Arguments Documentation](./explain_script_arguments.md).
39 | 
40 | ### Notes
41 | If you use internal job watchers to auto-cancel failing runs, ensure your training scripts do not print ignorable exceptions with full Traceback; otherwise watchers may incorrectly terminate jobs.
42 | 
43 | 
44 | 🎉 **Happy Training!**


--------------------------------------------------------------------------------
/Eagle/lmms_eval/api/filter.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List
 3 | 
 4 | from lmms_eval.api.instance import Instance
 5 | from datasets import Dataset
 6 | 
 7 | 
 8 | class Filter:
 9 |     """
10 |     Filter classes operate on a per-task level.
11 |     They take all model outputs (`instance.resps` for all `task.instances`)
12 |     across all instances of a task, and perform operations.
13 |     In a single run, one can configure any number of separate filters or lists of filters.
14 | 
15 |     """
16 | 
17 |     def __init__(self, *args, **kwargs) -> None:
18 |         """
19 |         Can define custom behavior here, if an individual instantiation of a Filter class should have state.
20 |         """
21 | 
22 |     def apply(self, resps, docs):
23 |         """
24 |         Defines the operation to perform on a list of the `inst.resps` properties of `Instance` objects.
25 |         Should return the list of (filtered) response lists *in the same order as they were input*, e.g.
26 |         if pass in [<inst.resps for instance 0>, <inst.resps for instance 1>] should return
27 |         [<filtered resps for instance 0>, <filtered resps for instance 1>]
28 |         """
29 |         return resps
30 | 
31 | 
32 | @dataclass
33 | class FilterEnsemble:
34 |     """
35 |     FilterEnsemble creates a pipeline applying multiple filters.
36 |     Its intended usage is to stack multiple post-processing steps in order.
37 |     `task.apply_filters` should use a list of FilterEnsemble classes that it stores, to apply each
38 |     pipeline separately.
39 |     """
40 | 
41 |     name: str
42 |     filters: List[Filter]
43 | 
44 |     def apply(self, instances: List[Instance], docs: List[Dataset]) -> None:
45 |         resps = [inst.resps for inst in instances]  # operate just on the model responses
46 |         for f in self.filters:
47 |             # apply filters in sequence
48 |             resps = f.apply(resps, docs)
49 | 
50 |         # add the end results after filtering to filtered_requests of their respective source instances.
51 |         # has key `self.name`: each FilterEnsemble applied in a given run should use a different name.
52 |         for inst, resp in zip(instances, resps):
53 |             inst.filtered_resps[self.name] = resp
54 | 


--------------------------------------------------------------------------------
/Eagle/eagle/model/multimodal_encoder/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA CORPORATION & AFFILIATES
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # SPDX-License-Identifier: Apache-2.0
16 | #
17 | # Portions of this file are derived from the LLaVA project
18 | # (https://github.com/haotian-liu/LLaVA), licensed under the
19 | # Apache License, Version 2.0.
20 | #
21 | # Modifications © 2024 NVIDIA CORPORATION & AFFILIATES, licensed under
22 | # the Apache License, Version 2.0.
23 | #
24 | # --------------------------------------------------------
25 | # LLaVA
26 | # Copyright (c) 2023 Haotian Liu
27 | # Licensed under the Apache License, Version 2.0
28 | # --------------------------------------------------------
29 | 
30 | import os
31 | from .clip_encoder import CLIPVisionTower
32 | from .multi_backbone_channel_concatenation_encoder import MultiBackboneChannelConcatenationVisionTower
33 | 
34 | def build_vision_tower(vision_tower_cfg, **kwargs):
35 |     vision_tower = getattr(vision_tower_cfg, 'mm_vision_tower', getattr(vision_tower_cfg, 'vision_tower', None))
36 | 
37 |     if "clip" in vision_tower and vision_tower.startswith("openai"):
38 |         is_absolute_path_exists = os.path.exists(vision_tower)
39 |         if is_absolute_path_exists or vision_tower.startswith("openai") or vision_tower.startswith("laion") or "ShareGPT4V" in vision_tower:
40 |             return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)       
41 |         raise ValueError(f'Unknown vision tower: {vision_tower}')
42 |     
43 |     elif ";" in vision_tower:
44 |         return MultiBackboneChannelConcatenationVisionTower(vision_tower, args=vision_tower_cfg)
45 | 
46 |     raise ValueError(f'Unknown vision tower: {vision_tower}')
47 | 


--------------------------------------------------------------------------------
/Eagle2_5/eaglevl/sp_utils/attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import math
 3 | 
 4 | import torch.distributed as dist
 5 | 
 6 | from .comm import (all_to_all, gather_forward_split_backward,
 7 |                    split_forward_gather_backward)
 8 | from .globals import get_pg_manager
 9 | 
10 | 
11 | def pre_process_for_sequence_parallel_attn(query_states,
12 |                                            key_states,
13 |                                            value_states,
14 |                                            scatter_dim=2,
15 |                                            gather_dim=1):
16 |     b, s_div_sp, h, d = query_states.shape
17 |     sp = get_pg_manager().ulysses_sequence_parallel_world_size
18 | 
19 | 
20 |     # (b, s_div_sp, insp*h, d/insp) -> (b, s, insp*h/sp, d/insp)
21 |     sequence_parallel_group = get_pg_manager().ulysses_sequence_parallel_group
22 |     query_states = all_to_all(
23 |         query_states,
24 |         sequence_parallel_group,
25 |         scatter_dim=scatter_dim,
26 |         gather_dim=gather_dim)
27 |     key_states = all_to_all(
28 |         key_states,
29 |         sequence_parallel_group,
30 |         scatter_dim=scatter_dim,
31 |         gather_dim=gather_dim)
32 |     value_states = all_to_all(
33 |         value_states,
34 |         sequence_parallel_group,
35 |         scatter_dim=scatter_dim,
36 |         gather_dim=gather_dim)
37 | 
38 | 
39 |     return query_states, key_states, value_states
40 | 
41 | 
42 | def post_process_for_sequence_parallel_attn(attn_output,
43 |                                             scatter_dim=1,
44 |                                             gather_dim=2):
45 |     sp = get_pg_manager().ulysses_sequence_parallel_world_size
46 |     # insp = get_inner_sequence_parallel_world_size()
47 |     b, s, h_mul_insp_div_sp, d = attn_output.shape
48 |     h = h_mul_insp_div_sp * sp
49 |     s_div_sp = s // sp
50 | 
51 | 
52 |     # (b, s, insp*h/sp, d/insp) -> (b, s_div_sp, insp*h, d/insp)
53 |     sequence_parallel_group = get_pg_manager().ulysses_sequence_parallel_group
54 |     output = all_to_all(
55 |         attn_output,
56 |         sequence_parallel_group,
57 |         scatter_dim=scatter_dim,
58 |         gather_dim=gather_dim)
59 | 
60 |     return output
61 | 
62 | 


--------------------------------------------------------------------------------
/Eagle/eagle/model/consolidate.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA CORPORATION & AFFILIATES
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # SPDX-License-Identifier: Apache-2.0
16 | #
17 | # Portions of this file are derived from the LLaVA project
18 | # (https://github.com/haotian-liu/LLaVA), licensed under the
19 | # Apache License, Version 2.0.
20 | #
21 | # Modifications © 2024 NVIDIA CORPORATION & AFFILIATES, licensed under
22 | # the Apache License, Version 2.0.
23 | #
24 | # --------------------------------------------------------
25 | # LLaVA
26 | # Copyright (c) 2023 Haotian Liu
27 | # Licensed under the Apache License, Version 2.0
28 | # --------------------------------------------------------
29 | 
30 | 
31 | """
32 | Usage:
33 | python3 -m eagle.model.consolidate --src ~/model_weights/eagle-7b --dst ~/model_weights/eagle-7b_consolidate
34 | """
35 | import argparse
36 | 
37 | import torch
38 | from transformers import AutoTokenizer, AutoModelForCausalLM
39 | from eagle.model import *
40 | from eagle.model.utils import auto_upgrade
41 | 
42 | 
43 | def consolidate_ckpt(src_path, dst_path):
44 |     print("Loading model")
45 |     auto_upgrade(src_path)
46 |     src_model = AutoModelForCausalLM.from_pretrained(src_path, torch_dtype=torch.float16, low_cpu_mem_usage=True)
47 |     src_tokenizer = AutoTokenizer.from_pretrained(src_path, use_fast=False)
48 |     src_model.save_pretrained(dst_path)
49 |     src_tokenizer.save_pretrained(dst_path)
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     parser = argparse.ArgumentParser()
54 |     parser.add_argument("--src", type=str, required=True)
55 |     parser.add_argument("--dst", type=str, required=True)
56 | 
57 |     args = parser.parse_args()
58 | 
59 |     consolidate_ckpt(args.src, args.dst)
60 | 


--------------------------------------------------------------------------------
/Eagle/scripts/convert_vqav2_for_submission.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import json
 4 | 
 5 | # for debug
 6 | import sys
 7 | sys.path.append(os.getcwd())
 8 | 
 9 | from llava.eval.m4c_evaluator import EvalAIAnswerProcessor
10 | 
11 | 
12 | def parse_args():
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument('--src', type=str, required=True)
15 |     parser.add_argument('--save_path', type=str, required=True)
16 |     parser.add_argument('--dir', type=str, default="./playground/data/eval/vqav2")
17 |     parser.add_argument('--ckpt', type=str, required=True)
18 |     parser.add_argument('--split', type=str, required=True)
19 |     return parser.parse_args()
20 | 
21 | 
22 | if __name__ == '__main__':
23 | 
24 |     args = parse_args()
25 | 
26 |     # src = os.path.join(args.dir, 'answers', args.split, args.ckpt, 'merge.jsonl')
27 |     src = args.src
28 |     test_split = os.path.join(args.dir, 'llava_vqav2_mscoco_test2015.jsonl')
29 |     # dst = os.path.join(args.dir, 'answers_upload', args.split, f'vqav2_test_{args.ckpt}.json')
30 |     dst = args.save_path
31 |     os.makedirs(os.path.dirname(dst), exist_ok=True)
32 | 
33 |     results = []
34 |     error_line = 0
35 |     for line_idx, line in enumerate(open(src)):
36 |         try:
37 |             results.append(json.loads(line))
38 |         except:
39 |             error_line += 1
40 | 
41 |     results = {x['question_id']: x['text'] for x in results}
42 |     test_split = [json.loads(line) for line in open(test_split)]
43 |     split_ids = set([x['question_id'] for x in test_split])
44 | 
45 |     print(f'total results: {len(results)}, total split: {len(test_split)}, error_line: {error_line}')
46 | 
47 |     all_answers = []
48 | 
49 |     answer_processor = EvalAIAnswerProcessor()
50 | 
51 |     for x in test_split:
52 |         if x['question_id'] not in results:
53 |             all_answers.append({
54 |                 'question_id': x['question_id'],
55 |                 'answer': ''
56 |             })
57 |         else:
58 |             all_answers.append({
59 |                 'question_id': x['question_id'],
60 |                 'answer': answer_processor(results[x['question_id']])
61 |             })
62 | 
63 |     with open(dst, 'w') as f:
64 |         json.dump(all_answers, open(dst, 'w'))
65 |     
66 |     print(f"successfully saving results to {dst}")
67 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/iconqa/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | 
 5 | def options_to_str(options_prompt):
 6 |     option_prompt_str = ""
 7 |     for i, option in enumerate(options_prompt):
 8 |         option_choice = chr(ord("A") + i)
 9 |         option_prompt_str += f"{option_choice}. {option}\n"
10 | 
11 |     option_prompt_str = option_prompt_str.rstrip("\n")
12 |     return option_prompt_str
13 | 
14 | 
15 | def doc_to_visual(doc):
16 |     image_list = []
17 |     if "query_image" in doc:
18 |         image_list.append(doc["query_image"].convert("RGB"))
19 |     for i in range(5):
20 |         id = f"choice_image_{i}"
21 |         if id in doc and doc[id] is not None:
22 |             image_list.append(doc[id].convert("RGB"))
23 |     assert len(image_list) < 6, "Maximum 5 images allowed for ICON-QA"
24 |     return image_list
25 | 
26 | 
27 | def doc_to_text(doc, model_specific_prompt_kwargs):
28 |     question = doc["question"]
29 |     ques_type = doc["ques_type"]
30 |     options_prompt = []
31 | 
32 |     if ques_type == "choose_img":
33 |         options_prompt.append("The first image.")
34 |         options_prompt.append("The second image.")
35 | 
36 |         options_str = options_to_str(options_prompt)
37 |         full_prompt = f"{model_specific_prompt_kwargs['pre_prompt']}{model_specific_prompt_kwargs['statement']}{model_specific_prompt_kwargs['options_statement'].format(question=question, options=options_str)}"
38 | 
39 |     elif ques_type == "choose_txt":
40 |         choices = doc["choices"].split(",")
41 |         for i, choice in enumerate(choices):
42 |             options_prompt.append(f"{choice}")
43 | 
44 |         options_str = options_to_str(options_prompt)
45 |         full_prompt = f"{model_specific_prompt_kwargs['pre_prompt']}{model_specific_prompt_kwargs['statement']}{model_specific_prompt_kwargs['options_statement'].format(question=question, options=options_str)}"
46 | 
47 |     elif ques_type == "fill_in_blank":
48 |         full_prompt = f"{model_specific_prompt_kwargs['pre_prompt']}{model_specific_prompt_kwargs['statement']}{model_specific_prompt_kwargs['freeform_statement'].format(question=question)}"
49 | 
50 |     return full_prompt
51 | 
52 | 
53 | def test_process_results(doc, results):
54 |     pred = results[0]
55 |     questionId = doc["question_id"]
56 |     answer = doc["answer"]
57 |     return {"anls": {"questionId": int(questionId), "answer": answer, "pred_answer": pred}}
58 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/seedbench_2/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | 
 4 | def seed_doc_to_visual(doc):
 5 |     return [image.convert("RGB") for image in doc["image"]]
 6 | 
 7 | 
 8 | def parse_choice_img(choice: str, img_token: str):
 9 |     if "jpg" in choice or "png" in choice:
10 |         return img_token
11 |     return choice
12 | 
13 | 
14 | def seed_doc_to_text(doc, model_specific_kwargs=None):
15 |     question = doc["question"]
16 |     question.replace("<img>", model_specific_kwargs["img_token"])
17 |     question += "\n" + f"A. {parse_choice_img(doc['choice_a'], model_specific_kwargs['img_token'])}\n"
18 |     question += f"B. {parse_choice_img(doc['choice_b'], model_specific_kwargs['img_token'])}\n"
19 |     question += f"C. {parse_choice_img(doc['choice_c'], model_specific_kwargs['img_token'])}\n"
20 |     question += f"D. {parse_choice_img(doc['choice_d'], model_specific_kwargs['img_token'])}"
21 |     if doc["data_type"] == "Image Generation":
22 |         num_img_in_question = len(doc["data_id"]) - 4
23 |         prepend_tokens = [model_specific_kwargs["img_token"]] * num_img_in_question
24 |         question = " ".join(prepend_tokens) + "\n" + question
25 |     return f"{question}\n{model_specific_kwargs['post_prompt']}"
26 | 
27 | 
28 | def seed_process_result(doc, result):
29 |     pred = result[0].strip()
30 |     if len(pred) > 1:
31 |         pred = pred[0]
32 |     answer = doc["answer"]
33 |     data_type = doc["data_type"].split(" ")
34 |     data_type = "_".join(data_type)
35 | 
36 |     return {f"seed_{data_type}": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}, f"seed_all": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}}
37 | 
38 | 
39 | def seed_aggregation_result(results):
40 |     total_count = 0
41 |     total_correct = 0
42 |     for result in results:
43 |         if result["pred"] == result["answer"]:
44 |             total_correct += 1
45 |         total_count += 1
46 |     return total_correct / total_count if total_count != 0 else 0
47 | 
48 | 
49 | def seed_aggregation_result_all(results):
50 |     score = seed_aggregation_result(results)
51 |     stored_results = []
52 |     for result in results:
53 |         stored_results.append({"question_id": result["question_id"], "prediction": result["pred"]})
54 |     with open("./seed_submission.json", "w") as f:
55 |         json.dump(stored_results, f, indent=4)
56 |     print("Storing files for seed_submission ...")
57 | 
58 |     return score
59 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/ok_vqa/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import os
 3 | import json
 4 | import yaml
 5 | import pathlib
 6 | import logging
 7 | import datetime
 8 | import statistics
 9 | 
10 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
11 | from lmms_eval.tasks._task_utils.vqa_eval_metric import EvalAIAnswerProcessor
12 | 
13 | eval_logger = logging.getLogger("lmms-eval")
14 | 
15 | 
16 | def ok_vqa_doc_to_visual(doc):
17 |     return [doc["image"].convert("RGB")]
18 | 
19 | 
20 | def ok_vqa_process_results(doc, result):
21 |     eval_ai_processor = EvalAIAnswerProcessor()
22 |     assert len(result) == 1, f"The result should be a list of length 1, but got {len(result)}."
23 |     resAns = eval_ai_processor(result[0])
24 |     accuracy = 0
25 | 
26 |     if "answers" in doc and doc["answers"] is not None:
27 |         gtAcc = []
28 | 
29 |         for i in range(len(doc["answers"])):
30 |             doc["answers"][i] = eval_ai_processor(doc["answers"][i])
31 | 
32 |         for i in range(len(doc["answers"])):
33 |             otherGTAns = [doc["answers"][j] for j in range(len(doc["answers"])) if i != j]
34 |             matchingAns = [item for item in otherGTAns if item == resAns]
35 |             acc = min(1, float(len(matchingAns)) / 3)
36 |             gtAcc.append(acc)
37 |         if gtAcc:
38 |             accuracy = statistics.mean(gtAcc)
39 |         else:
40 |             accuracy = 0
41 | 
42 |     return {
43 |         "exact_match": accuracy,
44 |         "submission": {
45 |             "image": f"{doc['question_id']}.jpg",
46 |             "answer": resAns,
47 |         },
48 |     }
49 | 
50 | 
51 | def ok_vqa_doc_to_text(doc, model_specific_prompt_kwargs=None):
52 |     question = doc["question"]
53 |     if model_specific_prompt_kwargs is None:
54 |         model_specific_prompt_kwargs = {}
55 |     pre_prompt = ""
56 |     post_prompt = ""
57 |     if "pre_prompt" in model_specific_prompt_kwargs:
58 |         pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
59 |     if "post_prompt" in model_specific_prompt_kwargs:
60 |         post_prompt = model_specific_prompt_kwargs["post_prompt"]
61 |     return f"{pre_prompt}{question}{post_prompt}"
62 | 
63 | 
64 | def ok_vqa_aggreate_submissions(results, args):
65 |     now_date_time = datetime.datetime.now().strftime("%Y-%m%d-%H%M-%S")
66 |     file = f"ok_vqa-test-submission-{now_date_time}.json"
67 |     path = generate_submission_file(file, args)
68 |     with open(path, "w") as f:
69 |         json.dump(results, f)
70 |     print(f"Submission file saved to {path}")
71 | 


--------------------------------------------------------------------------------
/Eagle2_5/README.md:
--------------------------------------------------------------------------------
 1 | # 🦅 Eagle 2.5
 2 | 
 3 | Eagle 2.5 is a multimodal large model (image/video × text). This repository provides the end-to-end guidance and scripts for the environment setup, data preparation, training, and inference of the Eagle VLM.
 4 | 
 5 | ---
 6 | 
 7 | ## 📚 Quick Start (Onboarding)
 8 | 
 9 | Recommended order:
10 | 
11 | 1) Set environment variables → 2) Install → 3) Prepare data → 4) Train → 5) Demo → 6) Inference
12 | 
13 | - Onboarding overview: see `./document/0.onboarding.md`
14 | 
15 | ---
16 | 
17 | ## ⚙️ Installation & Environment
18 | 
19 | - Detailed steps and dependencies: `./document/1.installing.md`
20 |   - Conda environment (Python 3.10)
21 |   - PyTorch and FlashAttention (match your CUDA)
22 |   - Install this repo with `pip install -e .`
23 |   - Troubleshooting notes (specific Transformers version, OpenCV dependencies, etc.)
24 | 
25 | ---
26 | 
27 | ## 📂 Data Preparation (Playground)
28 | 
29 | - Directory structure and JSONL/LMDB examples: `./document/2.preparing_playground.md`
30 |   - `playground/sft_recipe` (data recipe)
31 |   - `playground/sft_jsonl` and `playground/sft_data` (annotations and raw data)
32 |   - Example parquet→LMDB conversion scripts are not included in this repo
33 |   - Use `shell/prepare.sh` to normalize and generate `.prepare.json` (internal `submit_prepare_job.sh` is not included)
34 |   - LMDB reading example and tips: `./document/how_to_use_lmdb_to_read_images.md`
35 | 
36 | ---
37 | 
38 | ## 💪 Training (Stage-2 / Finetuning)
39 | 
40 | - Full training entry points and multinode/multigpu options: `./document/3.training.md`
41 |   - Single-node example: `GPUS=8 bash shell/train_stage2.sh 1 work_dirs/eagle2.5_debug`
42 |   - Multi-node example (srun/internal submit_job): `PARTITION=xxx GPUS=16 bash shell/train_stage2.sh 2 work_dirs/eagle2.5_multinode`
43 |   
44 | ---
45 | 
46 | ## ✨ Launching Streamlit Demo
47 | 
48 | - Interactive testing of the VLM with UI. Refer to document for more details: `./document/4.streamlit_demo.md`
49 | 
50 | ---
51 | 
52 | ## 🔮 Inference
53 | 
54 | - End-to-end usage and multimodal examples (single/multiple images, single/multiple videos, streaming, batch): `./document/5.inference.md`
55 |   - Load with `transformers` `AutoModel`/`AutoProcessor`: `"nvidia/Eagle-2.5-8B"`
56 |   - Recommended `torch_dtype=torch.bfloat16`; run `model.generate(...)` on GPU
57 | 
58 | ---
59 | 
60 | ## License
61 | 
62 | - See `LICENSE` and `LICENSE_MODEL` at the repository root.
63 | 
64 | ---
65 | 
66 | For detailed parameter explanations and launcher script notes, see: `./document/explain_script_arguments.md`.
67 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/vizwiz_vqa/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import os
 3 | import json
 4 | import yaml
 5 | import pathlib
 6 | import logging
 7 | import datetime
 8 | import statistics
 9 | 
10 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
11 | from lmms_eval.tasks._task_utils.vqa_eval_metric import EvalAIAnswerProcessor
12 | 
13 | eval_logger = logging.getLogger("lmms-eval")
14 | 
15 | 
16 | def vizwiz_vqa_doc_to_visual(doc):
17 |     return [doc["image"].convert("RGB")]
18 | 
19 | 
20 | def vizwiz_vqa_process_results(doc, result):
21 |     eval_ai_processor = EvalAIAnswerProcessor()
22 |     assert len(result) == 1, f"The result should be a list of length 1, but got {len(result)}."
23 |     resAns = eval_ai_processor(result[0])
24 |     accuracy = 0
25 | 
26 |     if "answers" in doc and doc["answers"] is not None:
27 |         gtAcc = []
28 | 
29 |         for i in range(len(doc["answers"])):
30 |             doc["answers"][i] = eval_ai_processor(doc["answers"][i])
31 | 
32 |         for i in range(len(doc["answers"])):
33 |             otherGTAns = [doc["answers"][j] for j in range(len(doc["answers"])) if i != j]
34 |             matchingAns = [item for item in otherGTAns if item == resAns]
35 |             acc = min(1, float(len(matchingAns)) / 3)
36 |             gtAcc.append(acc)
37 |         if gtAcc:
38 |             accuracy = statistics.mean(gtAcc)
39 |         else:
40 |             accuracy = 0
41 | 
42 |     return {
43 |         "exact_match": accuracy,
44 |         "submission": {
45 |             "image": f"{doc['question_id']}.jpg",
46 |             "answer": resAns,
47 |         },
48 |     }
49 | 
50 | 
51 | def vizwiz_vqa_doc_to_text(doc, model_specific_prompt_kwargs=None):
52 |     if model_specific_prompt_kwargs is None:
53 |         model_specific_prompt_kwargs = {}
54 |     pre_prompt = ""
55 |     post_prompt = ""
56 |     if "pre_prompt" in model_specific_prompt_kwargs:
57 |         pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
58 |     if "post_prompt" in model_specific_prompt_kwargs:
59 |         post_prompt = model_specific_prompt_kwargs["post_prompt"]
60 |     text = f"{pre_prompt}{doc['question'].capitalize()}{post_prompt}"
61 |     return text
62 | 
63 | 
64 | def vizwiz_vqa_aggreate_submissions(results, args):
65 |     now_date_time = datetime.datetime.now().strftime("%Y-%m%d-%H%M-%S")
66 |     submission_file_name = f"vizwiz_vqa-test-submission-{now_date_time}.json"
67 |     path = generate_submission_file(submission_file_name, args)
68 |     with open(path, "w") as f:
69 |         json.dump(results, f)
70 |     print(f"Submission file saved to {path}")
71 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/olympiadbench/cn_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import datetime
 4 | from lmms_eval.tasks.olympiadbench.olympiadbench_evals import OlympiadBenchEvaluator
 5 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
 6 | 
 7 | import logging
 8 | eval_logger = logging.getLogger("lmms-eval")
 9 | dir_name = os.path.dirname(os.path.abspath(__file__))
10 | 
11 | olympiadbench_evaluator = OlympiadBenchEvaluator()
12 | 
13 | def olympiadbench_doc_to_visual(doc):
14 |     return [image.convert("RGB") for image in doc["images"]]
15 | 
16 | def olympiadbench_doc_to_text(doc):
17 |     question = doc["question"]
18 |     subject = doc["subfield"]
19 |     mul_ans = doc["is_multiple_answer"]
20 |     if mul_ans is None:
21 |         mul_ans = False
22 |     ans_type = doc["answer_type"]
23 |     if ans_type == "Need_human_evaluate":
24 |         ans_type = "proof based"
25 | 
26 |     pre_prompt = f"以下是中国{subject}竞赛中的解答题。\n"
27 | 
28 |     post_prompt = ""
29 |     if not mul_ans:
30 |         post_prompt += f"答案类型为{ans_type}。\n"
31 |     else:
32 |         post_prompt += f"题目有多个答案，答案类型均为{ans_type}。\n"
33 |     post_prompt += "请根据题目的要求和所提供的信息计算得出答案。解答过程和结果中使用的变量和公式请使用LaTeX格式表示。请在最后以"
34 |     if not mul_ans:
35 |         post_prompt += '"所以最终答案是\\boxed{答案}。"\n'
36 |     else:
37 |         post_prompt += '"所以最终答案是\\boxed{用英⽂逗号连接的多个答案}。"\n'
38 | 
39 |     final_question = pre_prompt + question + '\n' + post_prompt
40 |     return final_question
41 | 
42 | def olympiadbench_process_results(doc, results):
43 |     precision = doc["error"]
44 |     is_proving = "TP" in doc["source"] 
45 |     if precision is None:
46 |         precision = 0
47 |     prediction = results[0].strip()
48 | 
49 |     if is_proving:
50 |         return {
51 |             "submission": prediction
52 |         }
53 |     else:
54 |         prediction = prediction.split("所以最终答案是")[-1]
55 |         prediction = prediction.replace('"', "").replace("\n", "").replace(" ", "").strip(".").strip("。")
56 |         accuracy = olympiadbench_evaluator.judge(prediction, doc["final_answer"][0], precision)
57 |         accuracy = int(accuracy)
58 |         return {
59 |             "exact_match": accuracy
60 |         }
61 | 
62 | def olympiadbench_aggregate_results(results, args):
63 |     now_date_time = datetime.datetime.now().strftime("%Y-%m%d-%H%M-%S")
64 |     submission_file_name = f"olympiadbench-test-cn-submission-{now_date_time}.json"
65 |     path = generate_submission_file(submission_file_name, args)
66 |     with open(path, "w") as f:
67 |         json.dump(results, f, ensure_ascii=False)
68 |     print(f"Submission file saved to {path}")
69 |     


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/chartqa/utils.py:
--------------------------------------------------------------------------------
 1 | def chartqa_doc_to_visual(doc):
 2 |     return [doc["image"].convert("RGB")]
 3 | 
 4 | 
 5 | def chartqa_doc_to_text(doc, model_specific_prompt_kwargs):
 6 |     question = doc["question"]
 7 |     pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
 8 |     post_prompt = model_specific_prompt_kwargs["post_prompt"]
 9 |     return f"{pre_prompt}{question}{post_prompt}"
10 | 
11 | 
12 | def chartqa_process_results(doc, results):
13 |     pred = results[0]
14 |     type = doc["type"]
15 |     score = relaxed_correctness(pred, doc["answer"])
16 |     score = 1.0 if score else 0.0
17 |     return_dict = {"relaxed_overall": score}
18 |     if type == "human_test":
19 |         return_dict["relaxed_human_split"] = score
20 |     else:
21 |         return_dict["relaxed_augmented_split"] = score
22 |     return return_dict
23 | 
24 | 
25 | def relaxed_correctness(prediction, target, max_relative_change: float = 0.05) -> bool:
26 |     """Calculates relaxed correctness.
27 | 
28 |     The correctness tolerates certain error ratio defined by max_relative_change.
29 |     See https://arxiv.org/pdf/2203.10244.pdf, end of section 5.1:
30 |     “Following Methani et al. (2020), we use a relaxed accuracy measure for the
31 |     numeric answers to allow a minor inaccuracy that may result from the automatic
32 |     data extraction process. We consider an answer to be correct if it is within
33 |     5% of the gold answer. For non-numeric answers, we still need an exact match
34 |     to consider an answer to be correct.”
35 | 
36 |     This funcion is taken from https://github.com/QwenLM/Qwen-VL/blob/34b4c0ee7b07726371b960911f249fe61b362ca3/eval_mm/evaluate_vqa.py#L113
37 |     Args:
38 |       target: List of target string.
39 |       prediction: List of predicted string.
40 |       max_relative_change: Maximum relative change.
41 | 
42 |     Returns:
43 |       Whether the prediction was correct given the specified tolerance.
44 |     """
45 | 
46 |     def _to_float(text: str):
47 |         try:
48 |             if text.endswith("%"):
49 |                 # Convert percentages to floats.
50 |                 return float(text.rstrip("%")) / 100.0
51 |             else:
52 |                 return float(text)
53 |         except ValueError:
54 |             return None
55 | 
56 |     prediction_float = _to_float(prediction)
57 |     target_float = _to_float(target)
58 |     if prediction_float is not None and target_float:
59 |         relative_change = abs(prediction_float - target_float) / abs(target_float)
60 |         return relative_change <= max_relative_change
61 |     else:
62 |         return prediction.lower() == target.lower()
63 | 


--------------------------------------------------------------------------------
/Eagle/lmms_eval/tasks/textvqa/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import os
 3 | import json
 4 | import yaml
 5 | import pathlib
 6 | import logging
 7 | import datetime
 8 | import statistics
 9 | 
10 | from lmms_eval.tasks._task_utils.vqa_eval_metric import EvalAIAnswerProcessor
11 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
12 | 
13 | eval_logger = logging.getLogger("lmms-eval")
14 | 
15 | 
16 | def textvqa_doc_to_visual(doc):
17 |     return [doc["image"].convert("RGB")]
18 | 
19 | 
20 | def textvqa_process_results(doc, result):
21 |     eval_ai_processor = EvalAIAnswerProcessor()
22 |     assert len(result) == 1, f"The result should be a list of length 1, but got {len(result)}."
23 |     resAns = eval_ai_processor(result[0])
24 |     accuracy = 0
25 | 
26 |     if "answers" in doc and doc["answers"] is not None:
27 |         gtAcc = []
28 | 
29 |         for i in range(len(doc["answers"])):
30 |             doc["answers"][i] = eval_ai_processor(doc["answers"][i])
31 | 
32 |         for i in range(len(doc["answers"])):
33 |             otherGTAns = [doc["answers"][j] for j in range(len(doc["answers"])) if i != j]
34 |             matchingAns = [item for item in otherGTAns if item == resAns]
35 |             acc = min(1, float(len(matchingAns)) / 3)
36 |             gtAcc.append(acc)
37 |         accuracy = statistics.mean(gtAcc)
38 | 
39 |     return {
40 |         "exact_match": accuracy,
41 |         "submission": {
42 |             "question_id": doc["question_id"],
43 |             "answer": resAns,
44 |         },
45 |     }
46 | 
47 | 
48 | def textvqa_doc_to_text(doc, model_specific_prompt_kwargs=None):
49 |     pre_prompt = ""
50 |     post_post = ""
51 |     ocr_ref = ""
52 |     if model_specific_prompt_kwargs:
53 |         if "pre_prompt" in model_specific_prompt_kwargs:
54 |             pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
55 |         if "post_prompt" in model_specific_prompt_kwargs:
56 |             post_prompt = model_specific_prompt_kwargs["post_prompt"]
57 |         if "ocr" in model_specific_prompt_kwargs and model_specific_prompt_kwargs["ocr"]:
58 |             ocr_ref = f"\nReference OCR token: {', '.join(doc['ocr_tokens'])}"
59 |     return f"{pre_prompt}{doc['question'].capitalize()}{ocr_ref}{post_prompt}"
60 | 
61 | 
62 | def textvqa_aggreate_submissions(results, args):
63 |     now_date_time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
64 |     path = generate_submission_file(f"textvqa_submission_{now_date_time}.json", args)
65 |     with open(path, "w") as f:
66 |         json.dump(results, f)
67 |     # print(f"Submission file saved to {path}")
68 |     eval_logger.info(f"Submission file saved to {path}")
69 | 


--------------------------------------------------------------------------------