├── LICENSE ├── README.md ├── Statement of Clarification.md ├── images ├── architecture.png └── readme.md ├── lego ├── LEGO.py ├── __init__.py ├── constants.py ├── conversation.py ├── mm_utils.py ├── model │ ├── builder.py │ └── utils.py ├── serve │ ├── __init__.py │ ├── cli.py │ ├── gradio_utils.py │ └── gradio_web_server.py ├── train │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── llama_flash_attn_monkey_patch.cpython-39.pyc │ │ ├── llava_trainer.cpython-39.pyc │ │ └── train.cpython-39.pyc │ ├── llama_flash_attn_monkey_patch.py │ ├── train.py │ └── train_mem.py └── utils.py ├── requirements.txt ├── scripts ├── finetune.sh ├── pretrain.sh ├── zero2.json └── zero3.json └── video_llama ├── __init__.py ├── __pycache__ └── __init__.cpython-39.pyc ├── common ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-39.pyc │ ├── dist_utils.cpython-39.pyc │ ├── logger.cpython-39.pyc │ ├── registry.cpython-39.pyc │ └── utils.cpython-39.pyc ├── config.py ├── dist_utils.py ├── gradcam.py ├── logger.py ├── optims.py ├── registry.py └── utils.py ├── configs ├── datasets │ ├── cc_sbu │ │ ├── align.yaml │ │ └── defaults.yaml │ ├── instruct │ │ ├── llava_instruct.yaml │ │ └── webvid_instruct.yaml │ ├── laion │ │ └── defaults.yaml │ └── webvid │ │ └── defaults.yaml ├── default.yaml └── models │ ├── minigpt4.yaml │ └── video_llama.yaml ├── conversation ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-39.pyc │ └── conversation_video.cpython-39.pyc └── conversation_video.py ├── datasets ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-39.pyc │ └── data_utils.cpython-39.pyc ├── builders │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── base_dataset_builder.cpython-39.pyc │ │ ├── image_text_pair_builder.cpython-39.pyc │ │ ├── instruct_builder.cpython-39.pyc │ │ └── video_caption_builder.cpython-39.pyc │ ├── base_dataset_builder.py │ ├── image_text_pair_builder.py │ ├── instruct_builder.py │ └── video_caption_builder.py ├── data_utils.py └── datasets │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-39.pyc │ ├── base_dataset.cpython-39.pyc │ ├── caption_datasets.cpython-39.pyc │ ├── cc_sbu_dataset.cpython-39.pyc │ ├── laion_dataset.cpython-39.pyc │ ├── llava_instruct_dataset.cpython-39.pyc │ ├── video_instruct_dataset.cpython-39.pyc │ └── webvid_datasets.cpython-39.pyc │ ├── base_dataset.py │ ├── caption_datasets.py │ ├── cc_sbu_dataset.py │ ├── dataloader_utils.py │ ├── laion_dataset.py │ ├── llava_instruct_dataset.py │ ├── video_instruct_dataset.py │ └── webvid_datasets.py ├── models ├── ImageBind │ ├── .assets │ │ ├── bird_audio.wav │ │ ├── bird_image.jpg │ │ ├── car_audio.wav │ │ ├── car_image.jpg │ │ ├── dog_audio.wav │ │ └── dog_image.jpg │ ├── CODE_OF_CONDUCT.md │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── README.md │ ├── __pycache__ │ │ └── data.cpython-39.pyc │ ├── bpe │ │ └── bpe_simple_vocab_16e6.txt.gz │ ├── data.py │ ├── model_card.md │ ├── models │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── helpers.cpython-39.pyc │ │ │ ├── imagebind_model.cpython-39.pyc │ │ │ ├── multimodal_preprocessors.cpython-39.pyc │ │ │ └── transformer.cpython-39.pyc │ │ ├── helpers.py │ │ ├── imagebind_model.py │ │ ├── multimodal_preprocessors.py │ │ └── transformer.py │ └── requirements.txt ├── Qformer.py ├── __init__.py ├── __pycache__ │ ├── Qformer.cpython-39.pyc │ ├── __init__.cpython-39.pyc │ ├── base_model.cpython-39.pyc │ ├── blip2.cpython-39.pyc │ ├── eva_vit.cpython-39.pyc │ ├── modeling_llama.cpython-39.pyc │ └── video_llama.cpython-39.pyc ├── base_model.py ├── blip2.py ├── blip2_outputs.py ├── eva_vit.py ├── modeling_llama.py └── video_llama.py ├── processors ├── .ipynb_checkpoints │ └── video_processor-checkpoint.py ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-39.pyc │ ├── base_processor.cpython-39.pyc │ ├── blip_processors.cpython-39.pyc │ ├── functional_video.cpython-39.pyc │ ├── randaugment.cpython-39.pyc │ ├── transforms_video.cpython-39.pyc │ └── video_processor.cpython-39.pyc ├── base_processor.py ├── blip_processors.py ├── functional_video.py ├── randaugment.py ├── transforms_video.py └── video_processor.py ├── runners ├── __init__.py ├── runner_base.py └── test.py └── tasks ├── __init__.py ├── __pycache__ ├── __init__.cpython-39.pyc ├── base_task.cpython-39.pyc ├── image_text_pretrain.cpython-39.pyc └── video_text_pretrain.cpython-39.pyc ├── base_task.py ├── image_text_pretrain.py └── video_text_pretrain.py /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/README.md -------------------------------------------------------------------------------- /Statement of Clarification.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/Statement of Clarification.md -------------------------------------------------------------------------------- /images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/images/architecture.png -------------------------------------------------------------------------------- /images/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lego/LEGO.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/LEGO.py -------------------------------------------------------------------------------- /lego/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/__init__.py -------------------------------------------------------------------------------- /lego/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/constants.py -------------------------------------------------------------------------------- /lego/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/conversation.py -------------------------------------------------------------------------------- /lego/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/mm_utils.py -------------------------------------------------------------------------------- /lego/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/model/builder.py -------------------------------------------------------------------------------- /lego/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/model/utils.py -------------------------------------------------------------------------------- /lego/serve/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lego/serve/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/serve/cli.py -------------------------------------------------------------------------------- /lego/serve/gradio_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/serve/gradio_utils.py -------------------------------------------------------------------------------- /lego/serve/gradio_web_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/serve/gradio_web_server.py -------------------------------------------------------------------------------- /lego/train/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/train/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /lego/train/__pycache__/llama_flash_attn_monkey_patch.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/train/__pycache__/llama_flash_attn_monkey_patch.cpython-39.pyc -------------------------------------------------------------------------------- /lego/train/__pycache__/llava_trainer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/train/__pycache__/llava_trainer.cpython-39.pyc -------------------------------------------------------------------------------- /lego/train/__pycache__/train.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/train/__pycache__/train.cpython-39.pyc -------------------------------------------------------------------------------- /lego/train/llama_flash_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/train/llama_flash_attn_monkey_patch.py -------------------------------------------------------------------------------- /lego/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/train/train.py -------------------------------------------------------------------------------- /lego/train/train_mem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/train/train_mem.py -------------------------------------------------------------------------------- /lego/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/lego/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/scripts/finetune.sh -------------------------------------------------------------------------------- /scripts/pretrain.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/scripts/pretrain.sh -------------------------------------------------------------------------------- /scripts/zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/scripts/zero2.json -------------------------------------------------------------------------------- /scripts/zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/scripts/zero3.json -------------------------------------------------------------------------------- /video_llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/__init__.py -------------------------------------------------------------------------------- /video_llama/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /video_llama/common/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/common/__pycache__/dist_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/__pycache__/dist_utils.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/common/__pycache__/logger.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/__pycache__/logger.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/common/__pycache__/registry.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/__pycache__/registry.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/common/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/common/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/config.py -------------------------------------------------------------------------------- /video_llama/common/dist_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/dist_utils.py -------------------------------------------------------------------------------- /video_llama/common/gradcam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/gradcam.py -------------------------------------------------------------------------------- /video_llama/common/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/logger.py -------------------------------------------------------------------------------- /video_llama/common/optims.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/optims.py -------------------------------------------------------------------------------- /video_llama/common/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/registry.py -------------------------------------------------------------------------------- /video_llama/common/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/common/utils.py -------------------------------------------------------------------------------- /video_llama/configs/datasets/cc_sbu/align.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/configs/datasets/cc_sbu/align.yaml -------------------------------------------------------------------------------- /video_llama/configs/datasets/cc_sbu/defaults.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/configs/datasets/cc_sbu/defaults.yaml -------------------------------------------------------------------------------- /video_llama/configs/datasets/instruct/llava_instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/configs/datasets/instruct/llava_instruct.yaml -------------------------------------------------------------------------------- /video_llama/configs/datasets/instruct/webvid_instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/configs/datasets/instruct/webvid_instruct.yaml -------------------------------------------------------------------------------- /video_llama/configs/datasets/laion/defaults.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/configs/datasets/laion/defaults.yaml -------------------------------------------------------------------------------- /video_llama/configs/datasets/webvid/defaults.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/configs/datasets/webvid/defaults.yaml -------------------------------------------------------------------------------- /video_llama/configs/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/configs/default.yaml -------------------------------------------------------------------------------- /video_llama/configs/models/minigpt4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/configs/models/minigpt4.yaml -------------------------------------------------------------------------------- /video_llama/configs/models/video_llama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/configs/models/video_llama.yaml -------------------------------------------------------------------------------- /video_llama/conversation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /video_llama/conversation/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/conversation/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/conversation/__pycache__/conversation_video.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/conversation/__pycache__/conversation_video.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/conversation/conversation_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/conversation/conversation_video.py -------------------------------------------------------------------------------- /video_llama/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /video_llama/datasets/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/__pycache__/data_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/__pycache__/data_utils.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/builders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/builders/__init__.py -------------------------------------------------------------------------------- /video_llama/datasets/builders/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/builders/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/builders/__pycache__/instruct_builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/builders/__pycache__/instruct_builder.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/builders/__pycache__/video_caption_builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/builders/__pycache__/video_caption_builder.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/builders/base_dataset_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/builders/base_dataset_builder.py -------------------------------------------------------------------------------- /video_llama/datasets/builders/image_text_pair_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/builders/image_text_pair_builder.py -------------------------------------------------------------------------------- /video_llama/datasets/builders/instruct_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/builders/instruct_builder.py -------------------------------------------------------------------------------- /video_llama/datasets/builders/video_caption_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/builders/video_caption_builder.py -------------------------------------------------------------------------------- /video_llama/datasets/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/data_utils.py -------------------------------------------------------------------------------- /video_llama/datasets/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /video_llama/datasets/datasets/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/datasets/__pycache__/base_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/__pycache__/base_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/datasets/__pycache__/caption_datasets.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/__pycache__/caption_datasets.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/datasets/__pycache__/laion_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/__pycache__/laion_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/datasets/__pycache__/llava_instruct_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/__pycache__/llava_instruct_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/datasets/__pycache__/video_instruct_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/__pycache__/video_instruct_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/datasets/__pycache__/webvid_datasets.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/__pycache__/webvid_datasets.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/datasets/datasets/base_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/base_dataset.py -------------------------------------------------------------------------------- /video_llama/datasets/datasets/caption_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/caption_datasets.py -------------------------------------------------------------------------------- /video_llama/datasets/datasets/cc_sbu_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/cc_sbu_dataset.py -------------------------------------------------------------------------------- /video_llama/datasets/datasets/dataloader_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/dataloader_utils.py -------------------------------------------------------------------------------- /video_llama/datasets/datasets/laion_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/laion_dataset.py -------------------------------------------------------------------------------- /video_llama/datasets/datasets/llava_instruct_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/llava_instruct_dataset.py -------------------------------------------------------------------------------- /video_llama/datasets/datasets/video_instruct_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/video_instruct_dataset.py -------------------------------------------------------------------------------- /video_llama/datasets/datasets/webvid_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/datasets/datasets/webvid_datasets.py -------------------------------------------------------------------------------- /video_llama/models/ImageBind/.assets/bird_audio.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/.assets/bird_audio.wav -------------------------------------------------------------------------------- /video_llama/models/ImageBind/.assets/bird_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/.assets/bird_image.jpg -------------------------------------------------------------------------------- /video_llama/models/ImageBind/.assets/car_audio.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/.assets/car_audio.wav -------------------------------------------------------------------------------- /video_llama/models/ImageBind/.assets/car_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/.assets/car_image.jpg -------------------------------------------------------------------------------- /video_llama/models/ImageBind/.assets/dog_audio.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/.assets/dog_audio.wav -------------------------------------------------------------------------------- /video_llama/models/ImageBind/.assets/dog_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/.assets/dog_image.jpg -------------------------------------------------------------------------------- /video_llama/models/ImageBind/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /video_llama/models/ImageBind/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/CONTRIBUTING.md -------------------------------------------------------------------------------- /video_llama/models/ImageBind/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/LICENSE -------------------------------------------------------------------------------- /video_llama/models/ImageBind/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/README.md -------------------------------------------------------------------------------- /video_llama/models/ImageBind/__pycache__/data.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/__pycache__/data.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /video_llama/models/ImageBind/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/data.py -------------------------------------------------------------------------------- /video_llama/models/ImageBind/model_card.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/model_card.md -------------------------------------------------------------------------------- /video_llama/models/ImageBind/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /video_llama/models/ImageBind/models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/ImageBind/models/__pycache__/helpers.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/models/__pycache__/helpers.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/ImageBind/models/__pycache__/imagebind_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/models/__pycache__/imagebind_model.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/ImageBind/models/__pycache__/multimodal_preprocessors.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/models/__pycache__/multimodal_preprocessors.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/ImageBind/models/__pycache__/transformer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/models/__pycache__/transformer.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/ImageBind/models/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/models/helpers.py -------------------------------------------------------------------------------- /video_llama/models/ImageBind/models/imagebind_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/models/imagebind_model.py -------------------------------------------------------------------------------- /video_llama/models/ImageBind/models/multimodal_preprocessors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/models/multimodal_preprocessors.py -------------------------------------------------------------------------------- /video_llama/models/ImageBind/models/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/models/transformer.py -------------------------------------------------------------------------------- /video_llama/models/ImageBind/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/ImageBind/requirements.txt -------------------------------------------------------------------------------- /video_llama/models/Qformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/Qformer.py -------------------------------------------------------------------------------- /video_llama/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/__init__.py -------------------------------------------------------------------------------- /video_llama/models/__pycache__/Qformer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/__pycache__/Qformer.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/__pycache__/base_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/__pycache__/base_model.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/__pycache__/blip2.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/__pycache__/blip2.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/__pycache__/eva_vit.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/__pycache__/eva_vit.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/__pycache__/modeling_llama.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/__pycache__/modeling_llama.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/__pycache__/video_llama.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/__pycache__/video_llama.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/models/base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/base_model.py -------------------------------------------------------------------------------- /video_llama/models/blip2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/blip2.py -------------------------------------------------------------------------------- /video_llama/models/blip2_outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/blip2_outputs.py -------------------------------------------------------------------------------- /video_llama/models/eva_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/eva_vit.py -------------------------------------------------------------------------------- /video_llama/models/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/modeling_llama.py -------------------------------------------------------------------------------- /video_llama/models/video_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/models/video_llama.py -------------------------------------------------------------------------------- /video_llama/processors/.ipynb_checkpoints/video_processor-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/.ipynb_checkpoints/video_processor-checkpoint.py -------------------------------------------------------------------------------- /video_llama/processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/__init__.py -------------------------------------------------------------------------------- /video_llama/processors/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/processors/__pycache__/base_processor.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/__pycache__/base_processor.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/processors/__pycache__/blip_processors.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/__pycache__/blip_processors.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/processors/__pycache__/functional_video.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/__pycache__/functional_video.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/processors/__pycache__/randaugment.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/__pycache__/randaugment.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/processors/__pycache__/transforms_video.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/__pycache__/transforms_video.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/processors/__pycache__/video_processor.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/__pycache__/video_processor.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/processors/base_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/base_processor.py -------------------------------------------------------------------------------- /video_llama/processors/blip_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/blip_processors.py -------------------------------------------------------------------------------- /video_llama/processors/functional_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/functional_video.py -------------------------------------------------------------------------------- /video_llama/processors/randaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/randaugment.py -------------------------------------------------------------------------------- /video_llama/processors/transforms_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/transforms_video.py -------------------------------------------------------------------------------- /video_llama/processors/video_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/processors/video_processor.py -------------------------------------------------------------------------------- /video_llama/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/runners/__init__.py -------------------------------------------------------------------------------- /video_llama/runners/runner_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/runners/runner_base.py -------------------------------------------------------------------------------- /video_llama/runners/test.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /video_llama/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/tasks/__init__.py -------------------------------------------------------------------------------- /video_llama/tasks/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/tasks/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/tasks/__pycache__/base_task.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/tasks/__pycache__/base_task.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/tasks/__pycache__/image_text_pretrain.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/tasks/__pycache__/image_text_pretrain.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/tasks/__pycache__/video_text_pretrain.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/tasks/__pycache__/video_text_pretrain.cpython-39.pyc -------------------------------------------------------------------------------- /video_llama/tasks/base_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/tasks/base_task.py -------------------------------------------------------------------------------- /video_llama/tasks/image_text_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/tasks/image_text_pretrain.py -------------------------------------------------------------------------------- /video_llama/tasks/video_text_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lzw-lzw/GroundingGPT/HEAD/video_llama/tasks/video_text_pretrain.py --------------------------------------------------------------------------------