├── .devcontainer ├── Dockerfile ├── devcontainer.env ├── devcontainer.json └── postCreateCommand.sh ├── .dockerignore ├── .editorconfig ├── .gitattributes ├── .github └── ISSUE_TEMPLATE │ ├── 1-usage.yaml │ ├── 2-feature-request.yaml │ ├── 3-question.yaml │ └── 4-discussion.yaml ├── .gitignore ├── LICENSE ├── README.md ├── cog.yaml ├── docs ├── Customize_Component.md ├── Data.md ├── Evaluation.md ├── Evaluation_image.md ├── Finetune_Custom_Data.md ├── Intel.md ├── MODEL_ZOO.md ├── Windows.md ├── macOS.md └── study_llm_backbone.md ├── images ├── all-model-compare.png ├── demo_cli.gif ├── llava-compare.png ├── llava_example_cmp.png └── vip-llava_arch.png ├── llava ├── __init__.py ├── constants.py ├── conversation.py ├── eval │ ├── eval_gpt_review.py │ ├── eval_gpt_review_bench.py │ ├── eval_gpt_review_visual.py │ ├── eval_pope.py │ ├── eval_science_qa.py │ ├── eval_science_qa_gpt4.py │ ├── eval_science_qa_gpt4_requery.py │ ├── eval_textvqa.py │ ├── generate_webpage_data_from_table.py │ ├── m4c_evaluator.py │ ├── model_qa.py │ ├── model_vqa.py │ ├── model_vqa_loader.py │ ├── model_vqa_loader_vip.py │ ├── model_vqa_mmbench.py │ ├── model_vqa_qbench.py │ ├── model_vqa_science.py │ ├── qa_baseline_gpt35.py │ ├── run_llava.py │ ├── summarize_gpt_review.py │ ├── table │ │ ├── answer │ │ │ ├── answer_alpaca-13b.jsonl │ │ │ ├── answer_bard.jsonl │ │ │ ├── answer_gpt35.jsonl │ │ │ ├── answer_llama-13b.jsonl │ │ │ └── answer_vicuna-13b.jsonl │ │ ├── caps_boxes_coco2014_val_80.jsonl │ │ ├── model.jsonl │ │ ├── prompt.jsonl │ │ ├── question.jsonl │ │ ├── results │ │ │ ├── test_sqa_llava_13b_v0.json │ │ │ └── test_sqa_llava_lcs_558k_sqa_12e_vicuna_v1_3_13b.json │ │ ├── review │ │ │ ├── review_alpaca-13b_vicuna-13b.jsonl │ │ │ ├── review_bard_vicuna-13b.jsonl │ │ │ ├── review_gpt35_vicuna-13b.jsonl │ │ │ └── review_llama-13b_vicuna-13b.jsonl │ │ ├── reviewer.jsonl │ │ └── rule.json │ └── webpage │ │ ├── figures │ │ ├── alpaca.png │ │ ├── bard.jpg │ │ ├── chatgpt.svg │ │ ├── llama.jpg │ │ ├── swords_FILL0_wght300_GRAD0_opsz48.svg │ │ └── vicuna.jpeg │ │ ├── index.html │ │ ├── script.js │ │ └── styles.css ├── mm_utils.py ├── model │ ├── __init__.py │ ├── apply_delta.py │ ├── builder.py │ ├── consolidate.py │ ├── language_model │ │ ├── configuration_phi3.py │ │ ├── llava_llama.py │ │ ├── llava_mpt.py │ │ ├── llava_phi3.py │ │ └── modeling_phi3.py │ ├── llava_arch.py │ ├── make_delta.py │ ├── multimodal_encoder │ │ ├── builder.py │ │ ├── clip_4layer_encoder.py │ │ └── clip_encoder.py │ ├── multimodal_projector │ │ └── builder.py │ └── utils.py ├── serve │ ├── __init__.py │ ├── cli.py │ ├── cli_vip.py │ ├── controller.py │ ├── examples │ │ ├── extreme_ironing.jpg │ │ └── waterview.jpg │ ├── gradio_web_server.py │ ├── model_worker.py │ ├── register_worker.py │ └── test_message.py ├── train │ ├── llama_flash_attn_monkey_patch.py │ ├── llama_xformers_attn_monkey_patch.py │ ├── llava_trainer.py │ ├── train.py │ ├── train_mem.py │ └── train_xformers.py ├── utils.py ├── visual_prompt_generator.py └── visual_prompt_organizer.py ├── playground └── data │ ├── eval │ └── vip-bench-example-results │ │ └── vip-llava-7b-human.json │ └── prompts │ ├── refcocog.text │ └── vg.text ├── predict.py ├── pyproject.toml └── scripts ├── convert_vipbench_for_eval.py ├── eval ├── pointQA.sh ├── v7w.sh ├── vcr_qa.sh ├── vcr_qar.sh ├── vip-bench_evaluator.py └── vipbench.sh ├── finetune_llava_1_5_llama3.sh ├── finetune_llava_1_5_phi3.sh ├── finetune_stage2.sh ├── finetune_stage2_lora.sh ├── finetune_stage3.sh ├── finetune_task.sh ├── finetune_task_lora.sh ├── finetune_vip_llava_llama3_stage2.sh ├── finetune_vip_llava_llama3_stage3.sh ├── finetune_vip_llava_phi3_stage2.sh ├── finetune_vip_llava_phi3_stage3.sh ├── pretrain.sh ├── pretrain_llava_1_5_llama3.sh ├── pretrain_llava_1_5_phi3.sh ├── pretrain_vip_llava_llama3.sh ├── pretrain_vip_llava_phi3.sh ├── v1_5 └── eval │ ├── gqa.sh │ ├── llavabench.sh │ ├── mmbench.sh │ ├── mmbench_cn.sh │ ├── mme.sh │ ├── mmvet.sh │ ├── pope.sh │ ├── qbench.sh │ ├── qbench_zh.sh │ ├── seed-img.sh │ ├── seed-process-anno.py │ ├── seed.sh │ ├── sqa.sh │ ├── textvqa.sh │ ├── vizwiz.sh │ └── vqav2.sh ├── zero2.json ├── zero3.json └── zero3_offload.json /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.devcontainer/Dockerfile -------------------------------------------------------------------------------- /.devcontainer/devcontainer.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.devcontainer/devcontainer.env -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.devcontainer/devcontainer.json -------------------------------------------------------------------------------- /.devcontainer/postCreateCommand.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.devcontainer/postCreateCommand.sh -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.dockerignore -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.editorconfig -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.gitattributes -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/1-usage.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.github/ISSUE_TEMPLATE/1-usage.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/2-feature-request.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.github/ISSUE_TEMPLATE/2-feature-request.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/3-question.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.github/ISSUE_TEMPLATE/3-question.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/4-discussion.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.github/ISSUE_TEMPLATE/4-discussion.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/README.md -------------------------------------------------------------------------------- /cog.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/cog.yaml -------------------------------------------------------------------------------- /docs/Customize_Component.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/docs/Customize_Component.md -------------------------------------------------------------------------------- /docs/Data.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/docs/Data.md -------------------------------------------------------------------------------- /docs/Evaluation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/docs/Evaluation.md -------------------------------------------------------------------------------- /docs/Evaluation_image.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/docs/Evaluation_image.md -------------------------------------------------------------------------------- /docs/Finetune_Custom_Data.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/docs/Finetune_Custom_Data.md -------------------------------------------------------------------------------- /docs/Intel.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/docs/Intel.md -------------------------------------------------------------------------------- /docs/MODEL_ZOO.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/docs/MODEL_ZOO.md -------------------------------------------------------------------------------- /docs/Windows.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/docs/Windows.md -------------------------------------------------------------------------------- /docs/macOS.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/docs/macOS.md -------------------------------------------------------------------------------- /docs/study_llm_backbone.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/docs/study_llm_backbone.md -------------------------------------------------------------------------------- /images/all-model-compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/images/all-model-compare.png -------------------------------------------------------------------------------- /images/demo_cli.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/images/demo_cli.gif -------------------------------------------------------------------------------- /images/llava-compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/images/llava-compare.png -------------------------------------------------------------------------------- /images/llava_example_cmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/images/llava_example_cmp.png -------------------------------------------------------------------------------- /images/vip-llava_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/images/vip-llava_arch.png -------------------------------------------------------------------------------- /llava/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/__init__.py -------------------------------------------------------------------------------- /llava/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/constants.py -------------------------------------------------------------------------------- /llava/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/conversation.py -------------------------------------------------------------------------------- /llava/eval/eval_gpt_review.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/eval_gpt_review.py -------------------------------------------------------------------------------- /llava/eval/eval_gpt_review_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/eval_gpt_review_bench.py -------------------------------------------------------------------------------- /llava/eval/eval_gpt_review_visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/eval_gpt_review_visual.py -------------------------------------------------------------------------------- /llava/eval/eval_pope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/eval_pope.py -------------------------------------------------------------------------------- /llava/eval/eval_science_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/eval_science_qa.py -------------------------------------------------------------------------------- /llava/eval/eval_science_qa_gpt4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/eval_science_qa_gpt4.py -------------------------------------------------------------------------------- /llava/eval/eval_science_qa_gpt4_requery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/eval_science_qa_gpt4_requery.py -------------------------------------------------------------------------------- /llava/eval/eval_textvqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/eval_textvqa.py -------------------------------------------------------------------------------- /llava/eval/generate_webpage_data_from_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/generate_webpage_data_from_table.py -------------------------------------------------------------------------------- /llava/eval/m4c_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/m4c_evaluator.py -------------------------------------------------------------------------------- /llava/eval/model_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/model_qa.py -------------------------------------------------------------------------------- /llava/eval/model_vqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/model_vqa.py -------------------------------------------------------------------------------- /llava/eval/model_vqa_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/model_vqa_loader.py -------------------------------------------------------------------------------- /llava/eval/model_vqa_loader_vip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/model_vqa_loader_vip.py -------------------------------------------------------------------------------- /llava/eval/model_vqa_mmbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/model_vqa_mmbench.py -------------------------------------------------------------------------------- /llava/eval/model_vqa_qbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/model_vqa_qbench.py -------------------------------------------------------------------------------- /llava/eval/model_vqa_science.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/model_vqa_science.py -------------------------------------------------------------------------------- /llava/eval/qa_baseline_gpt35.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/qa_baseline_gpt35.py -------------------------------------------------------------------------------- /llava/eval/run_llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/run_llava.py -------------------------------------------------------------------------------- /llava/eval/summarize_gpt_review.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/summarize_gpt_review.py -------------------------------------------------------------------------------- /llava/eval/table/answer/answer_alpaca-13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/answer/answer_alpaca-13b.jsonl -------------------------------------------------------------------------------- /llava/eval/table/answer/answer_bard.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/answer/answer_bard.jsonl -------------------------------------------------------------------------------- /llava/eval/table/answer/answer_gpt35.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/answer/answer_gpt35.jsonl -------------------------------------------------------------------------------- /llava/eval/table/answer/answer_llama-13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/answer/answer_llama-13b.jsonl -------------------------------------------------------------------------------- /llava/eval/table/answer/answer_vicuna-13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/answer/answer_vicuna-13b.jsonl -------------------------------------------------------------------------------- /llava/eval/table/caps_boxes_coco2014_val_80.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/caps_boxes_coco2014_val_80.jsonl -------------------------------------------------------------------------------- /llava/eval/table/model.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/model.jsonl -------------------------------------------------------------------------------- /llava/eval/table/prompt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/prompt.jsonl -------------------------------------------------------------------------------- /llava/eval/table/question.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/question.jsonl -------------------------------------------------------------------------------- /llava/eval/table/results/test_sqa_llava_13b_v0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/results/test_sqa_llava_13b_v0.json -------------------------------------------------------------------------------- /llava/eval/table/results/test_sqa_llava_lcs_558k_sqa_12e_vicuna_v1_3_13b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/results/test_sqa_llava_lcs_558k_sqa_12e_vicuna_v1_3_13b.json -------------------------------------------------------------------------------- /llava/eval/table/review/review_alpaca-13b_vicuna-13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/review/review_alpaca-13b_vicuna-13b.jsonl -------------------------------------------------------------------------------- /llava/eval/table/review/review_bard_vicuna-13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/review/review_bard_vicuna-13b.jsonl -------------------------------------------------------------------------------- /llava/eval/table/review/review_gpt35_vicuna-13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/review/review_gpt35_vicuna-13b.jsonl -------------------------------------------------------------------------------- /llava/eval/table/review/review_llama-13b_vicuna-13b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/review/review_llama-13b_vicuna-13b.jsonl -------------------------------------------------------------------------------- /llava/eval/table/reviewer.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/reviewer.jsonl -------------------------------------------------------------------------------- /llava/eval/table/rule.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/table/rule.json -------------------------------------------------------------------------------- /llava/eval/webpage/figures/alpaca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/webpage/figures/alpaca.png -------------------------------------------------------------------------------- /llava/eval/webpage/figures/bard.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/webpage/figures/bard.jpg -------------------------------------------------------------------------------- /llava/eval/webpage/figures/chatgpt.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/webpage/figures/chatgpt.svg -------------------------------------------------------------------------------- /llava/eval/webpage/figures/llama.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/webpage/figures/llama.jpg -------------------------------------------------------------------------------- /llava/eval/webpage/figures/swords_FILL0_wght300_GRAD0_opsz48.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/webpage/figures/swords_FILL0_wght300_GRAD0_opsz48.svg -------------------------------------------------------------------------------- /llava/eval/webpage/figures/vicuna.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/webpage/figures/vicuna.jpeg -------------------------------------------------------------------------------- /llava/eval/webpage/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/webpage/index.html -------------------------------------------------------------------------------- /llava/eval/webpage/script.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/webpage/script.js -------------------------------------------------------------------------------- /llava/eval/webpage/styles.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/eval/webpage/styles.css -------------------------------------------------------------------------------- /llava/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/mm_utils.py -------------------------------------------------------------------------------- /llava/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/__init__.py -------------------------------------------------------------------------------- /llava/model/apply_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/apply_delta.py -------------------------------------------------------------------------------- /llava/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/builder.py -------------------------------------------------------------------------------- /llava/model/consolidate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/consolidate.py -------------------------------------------------------------------------------- /llava/model/language_model/configuration_phi3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/language_model/configuration_phi3.py -------------------------------------------------------------------------------- /llava/model/language_model/llava_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/language_model/llava_llama.py -------------------------------------------------------------------------------- /llava/model/language_model/llava_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/language_model/llava_mpt.py -------------------------------------------------------------------------------- /llava/model/language_model/llava_phi3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/language_model/llava_phi3.py -------------------------------------------------------------------------------- /llava/model/language_model/modeling_phi3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/language_model/modeling_phi3.py -------------------------------------------------------------------------------- /llava/model/llava_arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/llava_arch.py -------------------------------------------------------------------------------- /llava/model/make_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/make_delta.py -------------------------------------------------------------------------------- /llava/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/multimodal_encoder/builder.py -------------------------------------------------------------------------------- /llava/model/multimodal_encoder/clip_4layer_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/multimodal_encoder/clip_4layer_encoder.py -------------------------------------------------------------------------------- /llava/model/multimodal_encoder/clip_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/multimodal_encoder/clip_encoder.py -------------------------------------------------------------------------------- /llava/model/multimodal_projector/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/multimodal_projector/builder.py -------------------------------------------------------------------------------- /llava/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/model/utils.py -------------------------------------------------------------------------------- /llava/serve/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llava/serve/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/serve/cli.py -------------------------------------------------------------------------------- /llava/serve/cli_vip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/serve/cli_vip.py -------------------------------------------------------------------------------- /llava/serve/controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/serve/controller.py -------------------------------------------------------------------------------- /llava/serve/examples/extreme_ironing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/serve/examples/extreme_ironing.jpg -------------------------------------------------------------------------------- /llava/serve/examples/waterview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/serve/examples/waterview.jpg -------------------------------------------------------------------------------- /llava/serve/gradio_web_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/serve/gradio_web_server.py -------------------------------------------------------------------------------- /llava/serve/model_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/serve/model_worker.py -------------------------------------------------------------------------------- /llava/serve/register_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/serve/register_worker.py -------------------------------------------------------------------------------- /llava/serve/test_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/serve/test_message.py -------------------------------------------------------------------------------- /llava/train/llama_flash_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/train/llama_flash_attn_monkey_patch.py -------------------------------------------------------------------------------- /llava/train/llama_xformers_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/train/llama_xformers_attn_monkey_patch.py -------------------------------------------------------------------------------- /llava/train/llava_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/train/llava_trainer.py -------------------------------------------------------------------------------- /llava/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/train/train.py -------------------------------------------------------------------------------- /llava/train/train_mem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/train/train_mem.py -------------------------------------------------------------------------------- /llava/train/train_xformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/train/train_xformers.py -------------------------------------------------------------------------------- /llava/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/utils.py -------------------------------------------------------------------------------- /llava/visual_prompt_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/visual_prompt_generator.py -------------------------------------------------------------------------------- /llava/visual_prompt_organizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/llava/visual_prompt_organizer.py -------------------------------------------------------------------------------- /playground/data/eval/vip-bench-example-results/vip-llava-7b-human.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/playground/data/eval/vip-bench-example-results/vip-llava-7b-human.json -------------------------------------------------------------------------------- /playground/data/prompts/refcocog.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/playground/data/prompts/refcocog.text -------------------------------------------------------------------------------- /playground/data/prompts/vg.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/playground/data/prompts/vg.text -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/predict.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/convert_vipbench_for_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/convert_vipbench_for_eval.py -------------------------------------------------------------------------------- /scripts/eval/pointQA.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/eval/pointQA.sh -------------------------------------------------------------------------------- /scripts/eval/v7w.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/eval/v7w.sh -------------------------------------------------------------------------------- /scripts/eval/vcr_qa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/eval/vcr_qa.sh -------------------------------------------------------------------------------- /scripts/eval/vcr_qar.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/eval/vcr_qar.sh -------------------------------------------------------------------------------- /scripts/eval/vip-bench_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/eval/vip-bench_evaluator.py -------------------------------------------------------------------------------- /scripts/eval/vipbench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/eval/vipbench.sh -------------------------------------------------------------------------------- /scripts/finetune_llava_1_5_llama3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_llava_1_5_llama3.sh -------------------------------------------------------------------------------- /scripts/finetune_llava_1_5_phi3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_llava_1_5_phi3.sh -------------------------------------------------------------------------------- /scripts/finetune_stage2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_stage2.sh -------------------------------------------------------------------------------- /scripts/finetune_stage2_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_stage2_lora.sh -------------------------------------------------------------------------------- /scripts/finetune_stage3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_stage3.sh -------------------------------------------------------------------------------- /scripts/finetune_task.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_task.sh -------------------------------------------------------------------------------- /scripts/finetune_task_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_task_lora.sh -------------------------------------------------------------------------------- /scripts/finetune_vip_llava_llama3_stage2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_vip_llava_llama3_stage2.sh -------------------------------------------------------------------------------- /scripts/finetune_vip_llava_llama3_stage3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_vip_llava_llama3_stage3.sh -------------------------------------------------------------------------------- /scripts/finetune_vip_llava_phi3_stage2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_vip_llava_phi3_stage2.sh -------------------------------------------------------------------------------- /scripts/finetune_vip_llava_phi3_stage3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/finetune_vip_llava_phi3_stage3.sh -------------------------------------------------------------------------------- /scripts/pretrain.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/pretrain.sh -------------------------------------------------------------------------------- /scripts/pretrain_llava_1_5_llama3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/pretrain_llava_1_5_llama3.sh -------------------------------------------------------------------------------- /scripts/pretrain_llava_1_5_phi3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/pretrain_llava_1_5_phi3.sh -------------------------------------------------------------------------------- /scripts/pretrain_vip_llava_llama3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/pretrain_vip_llava_llama3.sh -------------------------------------------------------------------------------- /scripts/pretrain_vip_llava_phi3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/pretrain_vip_llava_phi3.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/gqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/gqa.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/llavabench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/llavabench.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/mmbench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/mmbench.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/mmbench_cn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/mmbench_cn.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/mme.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/mme.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/mmvet.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/mmvet.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/pope.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/pope.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/qbench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/qbench.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/qbench_zh.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/qbench_zh.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/seed-img.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/seed-img.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/seed-process-anno.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/seed-process-anno.py -------------------------------------------------------------------------------- /scripts/v1_5/eval/seed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/seed.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/sqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/sqa.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/textvqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/textvqa.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/vizwiz.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/vizwiz.sh -------------------------------------------------------------------------------- /scripts/v1_5/eval/vqav2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/v1_5/eval/vqav2.sh -------------------------------------------------------------------------------- /scripts/zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/zero2.json -------------------------------------------------------------------------------- /scripts/zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/zero3.json -------------------------------------------------------------------------------- /scripts/zero3_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WisconsinAIVision/ViP-LLaVA/HEAD/scripts/zero3_offload.json --------------------------------------------------------------------------------