├── Cheetah ├── LICENSE_Lavis.md ├── cheetah │ ├── __init__.py │ ├── common │ │ ├── __init__.py │ │ ├── config.py │ │ ├── dist_utils.py │ │ ├── gradcam.py │ │ ├── logger.py │ │ ├── optims.py │ │ ├── registry.py │ │ └── utils.py │ ├── configs │ │ ├── default.yaml │ │ └── models │ │ │ ├── cheetah_llama2.yaml │ │ │ └── cheetah_vicuna.yaml │ ├── conversation │ │ ├── __init__.py │ │ ├── conversation.py │ │ └── conversation_llama2.py │ ├── models │ │ ├── Qformer.py │ │ ├── __init__.py │ │ ├── base_model.py │ │ ├── blip2.py │ │ ├── blip2_outputs.py │ │ ├── cheetah_llama2.py │ │ ├── cheetah_vicuna.py │ │ ├── eva_vit.py │ │ ├── modeling_llama.py │ │ └── modeling_llama2.py │ └── processors │ │ ├── __init__.py │ │ ├── base_processor.py │ │ ├── blip_processors.py │ │ └── randaugment.py ├── eval_configs │ ├── cheetah_eval_llama2.yaml │ └── cheetah_eval_vicuna.yaml ├── examples │ ├── 1.jpg │ ├── 10.jpg │ ├── 11.jpg │ ├── 12.jpg │ ├── 13.jpg │ ├── 14.jpg │ ├── 15.jpg │ ├── 16.jpeg │ ├── 17.jpeg │ ├── 18.png │ ├── 19.png │ ├── 2.jpg │ ├── 20.jpg │ ├── 21.jpg │ ├── 22.png │ ├── 3.jpg │ ├── 4.jpg │ ├── 5.jpg │ ├── 6.jpg │ ├── 7.jpg │ ├── 8.jpg │ └── 9.jpg ├── i4_cheetah_llama2.py ├── requirement.txt ├── test_cheetah_llama2.py └── test_cheetah_vicuna.py ├── DEMON Benchmark ├── DEMON-Core │ ├── Knowledge Grounded QA │ │ ├── MultiModalQA │ │ │ └── README.md │ │ ├── TQA │ │ │ └── README.md │ │ └── WebQA │ │ │ └── README.md │ ├── Multi-Image Reasoning │ │ ├── Fashion200K │ │ │ └── README.md │ │ ├── MIT-States_PropertyCoherence │ │ │ └── README.md │ │ ├── MIT-States_StateCoherence │ │ │ └── README.md │ │ ├── NLVR2 │ │ │ └── README.md │ │ ├── RecipeQA_ImageCoherence │ │ │ └── README.md │ │ ├── VISION │ │ │ └── README.md │ │ ├── VizWiz │ │ │ └── README.md │ │ └── nuScenes │ │ │ └── README.md │ ├── Multi-Modal Cloze │ │ ├── COMICS_Dialogue │ │ │ └── README.md │ │ ├── COMICS_Panel │ │ │ └── README.md │ │ ├── RecipeQA_TextCloze │ │ │ └── README.md │ │ └── RecipeQA_VisualCloze │ │ │ └── README.md │ ├── Multi-Modal Dialogue │ │ ├── ALFRED │ │ │ └── README.md │ │ └── MMCoQA │ │ │ └── README.md │ ├── Text-Rich Images QA │ │ ├── DocVQA │ │ │ └── README.md │ │ ├── OCR-VQA │ │ │ └── README.md │ │ └── SlideVQA │ │ │ └── README.md │ ├── Visual Relation Inference │ │ ├── Birds-to-Words │ │ │ └── README.md │ │ ├── CLEVR-Change │ │ │ └── README.md │ │ ├── IEdit │ │ │ └── README.md │ │ └── Spot-the-Diff │ │ │ └── README.md │ └── Visual Storytelling │ │ ├── AESOP │ │ └── README.md │ │ ├── DiDeMoSV │ │ └── README.md │ │ ├── FlintstonesSV │ │ └── README.md │ │ ├── PororoSV │ │ └── README.md │ │ └── VIST │ │ └── README.md ├── DEMON-Full │ ├── Knowledge Grounded QA │ │ ├── ManyModalQA │ │ │ └── README.md │ │ ├── MultiModalQA │ │ │ └── README.md │ │ ├── TQA │ │ │ └── README.md │ │ └── WebQA │ │ │ └── README.md │ ├── Multi-Image Reasoning │ │ ├── Fashion200K │ │ │ └── README.md │ │ ├── Gibson │ │ │ └── README.md │ │ ├── MIT-States_PropertyCoherence │ │ │ └── README.md │ │ ├── MIT-States_StateCoherence │ │ │ └── README.md │ │ ├── NLVR2 │ │ │ └── README.md │ │ ├── RecipeQA_ImageCoherence │ │ │ └── README.md │ │ ├── VISION │ │ │ └── README.md │ │ ├── VizWiz │ │ │ └── README.md │ │ └── nuScenes │ │ │ └── README.md │ ├── Multi-Modal Cloze │ │ ├── COMICS_Dialogue │ │ │ └── README.md │ │ ├── COMICS_Panel │ │ │ └── README.md │ │ ├── RecipeQA_TextCloze │ │ │ └── README.md │ │ └── RecipeQA_VisualCloze │ │ │ └── README.md │ ├── Multi-Modal Dialogue │ │ ├── ALFRED │ │ │ └── README.md │ │ └── MMCoQA │ │ │ └── README.md │ ├── Text-Rich Images QA │ │ ├── DocVQA │ │ │ └── README.md │ │ ├── OCR-VQA │ │ │ └── README.md │ │ └── SlideVQA │ │ │ └── README.md │ ├── Visual Relation Inference │ │ ├── Birds-to-Words │ │ │ └── README.md │ │ ├── CLEVR-Change │ │ │ └── README.md │ │ ├── IEdit │ │ │ └── README.md │ │ └── Spot-the-Diff │ │ │ └── README.md │ └── Visual Storytelling │ │ ├── AESOP │ │ └── README.md │ │ ├── DiDeMoSV │ │ └── README.md │ │ ├── FlintstonesSV │ │ └── README.md │ │ ├── PororoSV │ │ └── README.md │ │ └── VIST │ │ └── README.md ├── README.md ├── demo.svg └── scripts │ ├── evaluate.py │ ├── i4_score.py │ └── i4_scripts.sh ├── README.md ├── README_zh.md ├── figs ├── MME_results.png ├── case.svg ├── case_zh.svg ├── cheetah_logo.png ├── demo.svg ├── framework.png └── vpgc-model.svg └── license.md /Cheetah/LICENSE_Lavis.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/LICENSE_Lavis.md -------------------------------------------------------------------------------- /Cheetah/cheetah/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/__init__.py -------------------------------------------------------------------------------- /Cheetah/cheetah/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Cheetah/cheetah/common/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/common/config.py -------------------------------------------------------------------------------- /Cheetah/cheetah/common/dist_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/common/dist_utils.py -------------------------------------------------------------------------------- /Cheetah/cheetah/common/gradcam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/common/gradcam.py -------------------------------------------------------------------------------- /Cheetah/cheetah/common/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/common/logger.py -------------------------------------------------------------------------------- /Cheetah/cheetah/common/optims.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/common/optims.py -------------------------------------------------------------------------------- /Cheetah/cheetah/common/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/common/registry.py -------------------------------------------------------------------------------- /Cheetah/cheetah/common/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/common/utils.py -------------------------------------------------------------------------------- /Cheetah/cheetah/configs/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/configs/default.yaml -------------------------------------------------------------------------------- /Cheetah/cheetah/configs/models/cheetah_llama2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/configs/models/cheetah_llama2.yaml -------------------------------------------------------------------------------- /Cheetah/cheetah/configs/models/cheetah_vicuna.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/configs/models/cheetah_vicuna.yaml -------------------------------------------------------------------------------- /Cheetah/cheetah/conversation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Cheetah/cheetah/conversation/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/conversation/conversation.py -------------------------------------------------------------------------------- /Cheetah/cheetah/conversation/conversation_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/conversation/conversation_llama2.py -------------------------------------------------------------------------------- /Cheetah/cheetah/models/Qformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/models/Qformer.py -------------------------------------------------------------------------------- /Cheetah/cheetah/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/models/__init__.py -------------------------------------------------------------------------------- /Cheetah/cheetah/models/base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/models/base_model.py -------------------------------------------------------------------------------- /Cheetah/cheetah/models/blip2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/models/blip2.py -------------------------------------------------------------------------------- /Cheetah/cheetah/models/blip2_outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/models/blip2_outputs.py -------------------------------------------------------------------------------- /Cheetah/cheetah/models/cheetah_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/models/cheetah_llama2.py -------------------------------------------------------------------------------- /Cheetah/cheetah/models/cheetah_vicuna.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/models/cheetah_vicuna.py -------------------------------------------------------------------------------- /Cheetah/cheetah/models/eva_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/models/eva_vit.py -------------------------------------------------------------------------------- /Cheetah/cheetah/models/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/models/modeling_llama.py -------------------------------------------------------------------------------- /Cheetah/cheetah/models/modeling_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/models/modeling_llama2.py -------------------------------------------------------------------------------- /Cheetah/cheetah/processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/processors/__init__.py -------------------------------------------------------------------------------- /Cheetah/cheetah/processors/base_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/processors/base_processor.py -------------------------------------------------------------------------------- /Cheetah/cheetah/processors/blip_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/processors/blip_processors.py -------------------------------------------------------------------------------- /Cheetah/cheetah/processors/randaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/cheetah/processors/randaugment.py -------------------------------------------------------------------------------- /Cheetah/eval_configs/cheetah_eval_llama2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/eval_configs/cheetah_eval_llama2.yaml -------------------------------------------------------------------------------- /Cheetah/eval_configs/cheetah_eval_vicuna.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/eval_configs/cheetah_eval_vicuna.yaml -------------------------------------------------------------------------------- /Cheetah/examples/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/1.jpg -------------------------------------------------------------------------------- /Cheetah/examples/10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/10.jpg -------------------------------------------------------------------------------- /Cheetah/examples/11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/11.jpg -------------------------------------------------------------------------------- /Cheetah/examples/12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/12.jpg -------------------------------------------------------------------------------- /Cheetah/examples/13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/13.jpg -------------------------------------------------------------------------------- /Cheetah/examples/14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/14.jpg -------------------------------------------------------------------------------- /Cheetah/examples/15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/15.jpg -------------------------------------------------------------------------------- /Cheetah/examples/16.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/16.jpeg -------------------------------------------------------------------------------- /Cheetah/examples/17.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/17.jpeg -------------------------------------------------------------------------------- /Cheetah/examples/18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/18.png -------------------------------------------------------------------------------- /Cheetah/examples/19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/19.png -------------------------------------------------------------------------------- /Cheetah/examples/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/2.jpg -------------------------------------------------------------------------------- /Cheetah/examples/20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/20.jpg -------------------------------------------------------------------------------- /Cheetah/examples/21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/21.jpg -------------------------------------------------------------------------------- /Cheetah/examples/22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/22.png -------------------------------------------------------------------------------- /Cheetah/examples/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/3.jpg -------------------------------------------------------------------------------- /Cheetah/examples/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/4.jpg -------------------------------------------------------------------------------- /Cheetah/examples/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/5.jpg -------------------------------------------------------------------------------- /Cheetah/examples/6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/6.jpg -------------------------------------------------------------------------------- /Cheetah/examples/7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/7.jpg -------------------------------------------------------------------------------- /Cheetah/examples/8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/8.jpg -------------------------------------------------------------------------------- /Cheetah/examples/9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/examples/9.jpg -------------------------------------------------------------------------------- /Cheetah/i4_cheetah_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/i4_cheetah_llama2.py -------------------------------------------------------------------------------- /Cheetah/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/requirement.txt -------------------------------------------------------------------------------- /Cheetah/test_cheetah_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/test_cheetah_llama2.py -------------------------------------------------------------------------------- /Cheetah/test_cheetah_vicuna.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/Cheetah/test_cheetah_vicuna.py -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Knowledge Grounded QA/MultiModalQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Knowledge Grounded QA/MultiModalQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Knowledge Grounded QA/TQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Knowledge Grounded QA/TQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Knowledge Grounded QA/WebQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Knowledge Grounded QA/WebQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/Fashion200K/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/Fashion200K/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/MIT-States_PropertyCoherence/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/MIT-States_PropertyCoherence/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/MIT-States_StateCoherence/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/MIT-States_StateCoherence/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/NLVR2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/NLVR2/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/RecipeQA_ImageCoherence/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/RecipeQA_ImageCoherence/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/VISION/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/VISION/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/VizWiz/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/VizWiz/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/nuScenes/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Image Reasoning/nuScenes/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Modal Cloze/COMICS_Dialogue/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Modal Cloze/COMICS_Dialogue/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Modal Cloze/COMICS_Panel/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Modal Cloze/COMICS_Panel/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Modal Cloze/RecipeQA_TextCloze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Modal Cloze/RecipeQA_TextCloze/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Modal Cloze/RecipeQA_VisualCloze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Modal Cloze/RecipeQA_VisualCloze/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Modal Dialogue/ALFRED/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Modal Dialogue/ALFRED/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Multi-Modal Dialogue/MMCoQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Multi-Modal Dialogue/MMCoQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Text-Rich Images QA/DocVQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Text-Rich Images QA/DocVQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Text-Rich Images QA/OCR-VQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Text-Rich Images QA/OCR-VQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Text-Rich Images QA/SlideVQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Text-Rich Images QA/SlideVQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Visual Relation Inference/Birds-to-Words/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Visual Relation Inference/Birds-to-Words/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Visual Relation Inference/CLEVR-Change/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Visual Relation Inference/CLEVR-Change/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Visual Relation Inference/IEdit/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Visual Relation Inference/IEdit/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Visual Relation Inference/Spot-the-Diff/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Visual Relation Inference/Spot-the-Diff/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Visual Storytelling/AESOP/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Visual Storytelling/AESOP/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Visual Storytelling/DiDeMoSV/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Visual Storytelling/DiDeMoSV/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Visual Storytelling/FlintstonesSV/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Visual Storytelling/FlintstonesSV/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Visual Storytelling/PororoSV/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Visual Storytelling/PororoSV/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Core/Visual Storytelling/VIST/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Core/Visual Storytelling/VIST/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Knowledge Grounded QA/ManyModalQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Knowledge Grounded QA/ManyModalQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Knowledge Grounded QA/MultiModalQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Knowledge Grounded QA/MultiModalQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Knowledge Grounded QA/TQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Knowledge Grounded QA/TQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Knowledge Grounded QA/WebQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Knowledge Grounded QA/WebQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/Fashion200K/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/Fashion200K/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/Gibson/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/Gibson/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/MIT-States_PropertyCoherence/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/MIT-States_PropertyCoherence/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/MIT-States_StateCoherence/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/MIT-States_StateCoherence/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/NLVR2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/NLVR2/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/RecipeQA_ImageCoherence/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/RecipeQA_ImageCoherence/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/VISION/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/VISION/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/VizWiz/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/VizWiz/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/nuScenes/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Image Reasoning/nuScenes/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Modal Cloze/COMICS_Dialogue/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Modal Cloze/COMICS_Dialogue/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Modal Cloze/COMICS_Panel/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Modal Cloze/COMICS_Panel/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Modal Cloze/RecipeQA_TextCloze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Modal Cloze/RecipeQA_TextCloze/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Modal Cloze/RecipeQA_VisualCloze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Modal Cloze/RecipeQA_VisualCloze/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Modal Dialogue/ALFRED/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Modal Dialogue/ALFRED/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Multi-Modal Dialogue/MMCoQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Multi-Modal Dialogue/MMCoQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Text-Rich Images QA/DocVQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Text-Rich Images QA/DocVQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Text-Rich Images QA/OCR-VQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Text-Rich Images QA/OCR-VQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Text-Rich Images QA/SlideVQA/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Text-Rich Images QA/SlideVQA/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Visual Relation Inference/Birds-to-Words/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Visual Relation Inference/Birds-to-Words/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Visual Relation Inference/CLEVR-Change/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Visual Relation Inference/CLEVR-Change/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Visual Relation Inference/IEdit/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Visual Relation Inference/IEdit/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Visual Relation Inference/Spot-the-Diff/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Visual Relation Inference/Spot-the-Diff/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Visual Storytelling/AESOP/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Visual Storytelling/AESOP/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Visual Storytelling/DiDeMoSV/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Visual Storytelling/DiDeMoSV/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Visual Storytelling/FlintstonesSV/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Visual Storytelling/FlintstonesSV/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Visual Storytelling/PororoSV/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Visual Storytelling/PororoSV/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/DEMON-Full/Visual Storytelling/VIST/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/DEMON-Full/Visual Storytelling/VIST/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/README.md -------------------------------------------------------------------------------- /DEMON Benchmark/demo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/demo.svg -------------------------------------------------------------------------------- /DEMON Benchmark/scripts/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/scripts/evaluate.py -------------------------------------------------------------------------------- /DEMON Benchmark/scripts/i4_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/scripts/i4_score.py -------------------------------------------------------------------------------- /DEMON Benchmark/scripts/i4_scripts.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/DEMON Benchmark/scripts/i4_scripts.sh -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/README.md -------------------------------------------------------------------------------- /README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/README_zh.md -------------------------------------------------------------------------------- /figs/MME_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/figs/MME_results.png -------------------------------------------------------------------------------- /figs/case.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/figs/case.svg -------------------------------------------------------------------------------- /figs/case_zh.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/figs/case_zh.svg -------------------------------------------------------------------------------- /figs/cheetah_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/figs/cheetah_logo.png -------------------------------------------------------------------------------- /figs/demo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/figs/demo.svg -------------------------------------------------------------------------------- /figs/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/figs/framework.png -------------------------------------------------------------------------------- /figs/vpgc-model.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/figs/vpgc-model.svg -------------------------------------------------------------------------------- /license.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DCDmllm/Cheetah/HEAD/license.md --------------------------------------------------------------------------------