├── README.md ├── cli.py ├── data └── Data.md ├── detector.py ├── eval ├── TQA.py ├── VQA.py ├── caption.py ├── eval_qa.py ├── recognition.py ├── recognition_test.json ├── yollava-text-only-qa.json └── yollava-visual-qa.json ├── example_database ├── bo.png ├── brown-duck.png ├── butin.png ├── cat-cup.png ├── chua-thien-mu.png ├── ciin.png ├── database.json ├── denisdang.png ├── dragon.png ├── duck-banana.png └── dug.png ├── images ├── framework.png └── teaser.png ├── llava ├── __init__.py ├── constants.py ├── conversation.py ├── mm_utils.py ├── model │ ├── __init__.py │ ├── apply_delta.py │ ├── builder.py │ ├── consolidate.py │ ├── language_model │ │ ├── llava_llama.py │ │ ├── llava_mistral.py │ │ ├── llava_mpt.py │ │ └── llava_phi3.py │ ├── llava_arch.py │ ├── make_delta.py │ ├── multimodal_encoder │ │ ├── builder.py │ │ └── clip_encoder.py │ ├── multimodal_projector │ │ └── builder.py │ └── utils.py ├── train │ ├── llava_trainer.py │ ├── rap_train.py │ └── train.py └── utils.py ├── pyproject.toml ├── requirements.txt ├── retriever.py └── scripts ├── train_lora_llava.sh ├── train_rap_llava.sh ├── train_rap_phi3.sh ├── zero2.json ├── zero3.json └── zero3_offload.json /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/README.md -------------------------------------------------------------------------------- /cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/cli.py -------------------------------------------------------------------------------- /data/Data.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/data/Data.md -------------------------------------------------------------------------------- /detector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/detector.py -------------------------------------------------------------------------------- /eval/TQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/eval/TQA.py -------------------------------------------------------------------------------- /eval/VQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/eval/VQA.py -------------------------------------------------------------------------------- /eval/caption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/eval/caption.py -------------------------------------------------------------------------------- /eval/eval_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/eval/eval_qa.py -------------------------------------------------------------------------------- /eval/recognition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/eval/recognition.py -------------------------------------------------------------------------------- /eval/recognition_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/eval/recognition_test.json -------------------------------------------------------------------------------- /eval/yollava-text-only-qa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/eval/yollava-text-only-qa.json -------------------------------------------------------------------------------- /eval/yollava-visual-qa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/eval/yollava-visual-qa.json -------------------------------------------------------------------------------- /example_database/bo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/bo.png -------------------------------------------------------------------------------- /example_database/brown-duck.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/brown-duck.png -------------------------------------------------------------------------------- /example_database/butin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/butin.png -------------------------------------------------------------------------------- /example_database/cat-cup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/cat-cup.png -------------------------------------------------------------------------------- /example_database/chua-thien-mu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/chua-thien-mu.png -------------------------------------------------------------------------------- /example_database/ciin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/ciin.png -------------------------------------------------------------------------------- /example_database/database.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/database.json -------------------------------------------------------------------------------- /example_database/denisdang.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/denisdang.png -------------------------------------------------------------------------------- /example_database/dragon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/dragon.png -------------------------------------------------------------------------------- /example_database/duck-banana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/duck-banana.png -------------------------------------------------------------------------------- /example_database/dug.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/example_database/dug.png -------------------------------------------------------------------------------- /images/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/images/framework.png -------------------------------------------------------------------------------- /images/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/images/teaser.png -------------------------------------------------------------------------------- /llava/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import LlavaLlamaForCausalLM -------------------------------------------------------------------------------- /llava/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/constants.py -------------------------------------------------------------------------------- /llava/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/conversation.py -------------------------------------------------------------------------------- /llava/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/mm_utils.py -------------------------------------------------------------------------------- /llava/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/__init__.py -------------------------------------------------------------------------------- /llava/model/apply_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/apply_delta.py -------------------------------------------------------------------------------- /llava/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/builder.py -------------------------------------------------------------------------------- /llava/model/consolidate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/consolidate.py -------------------------------------------------------------------------------- /llava/model/language_model/llava_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/language_model/llava_llama.py -------------------------------------------------------------------------------- /llava/model/language_model/llava_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/language_model/llava_mistral.py -------------------------------------------------------------------------------- /llava/model/language_model/llava_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/language_model/llava_mpt.py -------------------------------------------------------------------------------- /llava/model/language_model/llava_phi3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/language_model/llava_phi3.py -------------------------------------------------------------------------------- /llava/model/llava_arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/llava_arch.py -------------------------------------------------------------------------------- /llava/model/make_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/make_delta.py -------------------------------------------------------------------------------- /llava/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/multimodal_encoder/builder.py -------------------------------------------------------------------------------- /llava/model/multimodal_encoder/clip_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/multimodal_encoder/clip_encoder.py -------------------------------------------------------------------------------- /llava/model/multimodal_projector/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/multimodal_projector/builder.py -------------------------------------------------------------------------------- /llava/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/model/utils.py -------------------------------------------------------------------------------- /llava/train/llava_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/train/llava_trainer.py -------------------------------------------------------------------------------- /llava/train/rap_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/train/rap_train.py -------------------------------------------------------------------------------- /llava/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/train/train.py -------------------------------------------------------------------------------- /llava/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/llava/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/requirements.txt -------------------------------------------------------------------------------- /retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/retriever.py -------------------------------------------------------------------------------- /scripts/train_lora_llava.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/scripts/train_lora_llava.sh -------------------------------------------------------------------------------- /scripts/train_rap_llava.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/scripts/train_rap_llava.sh -------------------------------------------------------------------------------- /scripts/train_rap_phi3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/scripts/train_rap_phi3.sh -------------------------------------------------------------------------------- /scripts/zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/scripts/zero2.json -------------------------------------------------------------------------------- /scripts/zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/scripts/zero3.json -------------------------------------------------------------------------------- /scripts/zero3_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hoar012/RAP-MLLM/HEAD/scripts/zero3_offload.json --------------------------------------------------------------------------------