├── .gitignore ├── LICENSE ├── README.md ├── assets ├── READMEv1.md ├── mobilevlm_arch.png ├── mobilevlm_v2_arch.png └── samples │ └── demo.jpg ├── mobilellama └── sft │ ├── MobileLLaMA_SFT.md │ ├── sft_MobileLLaMA-1.4B-Base.sh │ └── sft_MobileLLaMA-2.7B-Base.sh ├── mobilevlm ├── constants.py ├── conversation.py ├── eval │ ├── m4c_evaluator.py │ ├── model_vqa_loader.py │ ├── model_vqa_mmbench.py │ └── model_vqa_science.py ├── model │ ├── mobilellama.py │ ├── mobilevlm.py │ ├── vision_encoder.py │ └── vision_projector.py ├── train │ ├── llama_flash_attn.py │ ├── train.py │ ├── train_mem.py │ └── trainer.py └── utils.py ├── requirements.txt ├── run.sh ├── run_v1.sh └── scripts ├── benchmark.sh ├── benchmark ├── gqa.sh ├── mmbench.sh ├── mme.sh ├── pope.sh ├── sqa.sh └── textvqa.sh ├── deepspeed ├── zero2.json └── zero3.json ├── inference.py └── mergelora.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/README.md -------------------------------------------------------------------------------- /assets/READMEv1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/assets/READMEv1.md -------------------------------------------------------------------------------- /assets/mobilevlm_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/assets/mobilevlm_arch.png -------------------------------------------------------------------------------- /assets/mobilevlm_v2_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/assets/mobilevlm_v2_arch.png -------------------------------------------------------------------------------- /assets/samples/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/assets/samples/demo.jpg -------------------------------------------------------------------------------- /mobilellama/sft/MobileLLaMA_SFT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilellama/sft/MobileLLaMA_SFT.md -------------------------------------------------------------------------------- /mobilellama/sft/sft_MobileLLaMA-1.4B-Base.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilellama/sft/sft_MobileLLaMA-1.4B-Base.sh -------------------------------------------------------------------------------- /mobilellama/sft/sft_MobileLLaMA-2.7B-Base.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilellama/sft/sft_MobileLLaMA-2.7B-Base.sh -------------------------------------------------------------------------------- /mobilevlm/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/constants.py -------------------------------------------------------------------------------- /mobilevlm/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/conversation.py -------------------------------------------------------------------------------- /mobilevlm/eval/m4c_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/eval/m4c_evaluator.py -------------------------------------------------------------------------------- /mobilevlm/eval/model_vqa_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/eval/model_vqa_loader.py -------------------------------------------------------------------------------- /mobilevlm/eval/model_vqa_mmbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/eval/model_vqa_mmbench.py -------------------------------------------------------------------------------- /mobilevlm/eval/model_vqa_science.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/eval/model_vqa_science.py -------------------------------------------------------------------------------- /mobilevlm/model/mobilellama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/model/mobilellama.py -------------------------------------------------------------------------------- /mobilevlm/model/mobilevlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/model/mobilevlm.py -------------------------------------------------------------------------------- /mobilevlm/model/vision_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/model/vision_encoder.py -------------------------------------------------------------------------------- /mobilevlm/model/vision_projector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/model/vision_projector.py -------------------------------------------------------------------------------- /mobilevlm/train/llama_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/train/llama_flash_attn.py -------------------------------------------------------------------------------- /mobilevlm/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/train/train.py -------------------------------------------------------------------------------- /mobilevlm/train/train_mem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/train/train_mem.py -------------------------------------------------------------------------------- /mobilevlm/train/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/train/trainer.py -------------------------------------------------------------------------------- /mobilevlm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/mobilevlm/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/requirements.txt -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/run.sh -------------------------------------------------------------------------------- /run_v1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/run_v1.sh -------------------------------------------------------------------------------- /scripts/benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/benchmark.sh -------------------------------------------------------------------------------- /scripts/benchmark/gqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/benchmark/gqa.sh -------------------------------------------------------------------------------- /scripts/benchmark/mmbench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/benchmark/mmbench.sh -------------------------------------------------------------------------------- /scripts/benchmark/mme.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/benchmark/mme.sh -------------------------------------------------------------------------------- /scripts/benchmark/pope.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/benchmark/pope.sh -------------------------------------------------------------------------------- /scripts/benchmark/sqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/benchmark/sqa.sh -------------------------------------------------------------------------------- /scripts/benchmark/textvqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/benchmark/textvqa.sh -------------------------------------------------------------------------------- /scripts/deepspeed/zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/deepspeed/zero2.json -------------------------------------------------------------------------------- /scripts/deepspeed/zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/deepspeed/zero3.json -------------------------------------------------------------------------------- /scripts/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/inference.py -------------------------------------------------------------------------------- /scripts/mergelora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meituan-AutoML/MobileVLM/HEAD/scripts/mergelora.py --------------------------------------------------------------------------------