├── LICENSE ├── LICENSE_Lavis.md ├── LICENSE_Minigpt4.md ├── README.md ├── apply_delta.py ├── data_filtering ├── compute_clip_frame_scores.py └── filtered_videos.pkl ├── demo_video.py ├── environment.yml ├── eval_configs ├── conversation_demo.yaml └── koala_eval_only_vl.yaml ├── eval_qa_egoschema.py ├── figs └── architecture_v2.png ├── koala ├── __init__.py ├── __pycache__ │ └── __init__.cpython-39.pyc ├── common │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── config.cpython-39.pyc │ │ ├── dist_utils.cpython-39.pyc │ │ ├── logger.cpython-39.pyc │ │ ├── optims.cpython-39.pyc │ │ ├── registry.cpython-39.pyc │ │ └── utils.cpython-39.pyc │ ├── config.py │ ├── dist_utils.py │ ├── gradcam.py │ ├── logger.py │ ├── optims.py │ ├── registry.py │ └── utils.py ├── configs │ ├── datasets │ │ ├── cc_sbu │ │ │ ├── align.yaml │ │ │ └── defaults.yaml │ │ ├── instruct │ │ │ ├── llava_instruct.yaml │ │ │ └── webvid_instruct.yaml │ │ ├── laion │ │ │ └── defaults.yaml │ │ └── webvid │ │ │ └── defaults.yaml │ ├── default.yaml │ └── models │ │ ├── minigpt4.yaml │ │ └── video_llama.yaml ├── conversation │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ └── conversation_video.cpython-39.pyc │ └── conversation_video.py ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ └── data_utils.cpython-39.pyc │ ├── builders │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── base_dataset_builder.cpython-39.pyc │ │ │ ├── image_text_pair_builder.cpython-39.pyc │ │ │ ├── instruct_builder.cpython-39.pyc │ │ │ └── video_caption_builder.cpython-39.pyc │ │ ├── base_dataset_builder.py │ │ ├── image_text_pair_builder.py │ │ ├── instruct_builder.py │ │ └── video_caption_builder.py │ ├── data_utils.py │ └── datasets │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── base_dataset.cpython-39.pyc │ │ ├── caption_datasets.cpython-39.pyc │ │ ├── cc_sbu_dataset.cpython-39.pyc │ │ ├── dataloader_utils.cpython-39.pyc │ │ ├── laion_dataset.cpython-39.pyc │ │ ├── llava_instruct_dataset.cpython-39.pyc │ │ ├── video_instruct_dataset.cpython-39.pyc │ │ └── webvid_datasets.cpython-39.pyc │ │ ├── base_dataset.py │ │ ├── caption_datasets.py │ │ ├── cc_sbu_dataset.py │ │ ├── dataloader_utils.py │ │ ├── laion_dataset.py │ │ ├── llava_instruct_dataset.py │ │ ├── video_instruct_dataset.py │ │ └── webvid_datasets.py ├── models │ ├── ImageBind │ │ ├── .assets │ │ │ ├── bird_audio.wav │ │ │ ├── bird_image.jpg │ │ │ ├── car_audio.wav │ │ │ ├── car_image.jpg │ │ │ ├── dog_audio.wav │ │ │ └── dog_image.jpg │ │ ├── CODE_OF_CONDUCT.md │ │ ├── CONTRIBUTING.md │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __pycache__ │ │ │ └── data.cpython-39.pyc │ │ ├── bpe │ │ │ └── bpe_simple_vocab_16e6.txt.gz │ │ ├── data.py │ │ ├── model_card.md │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ ├── helpers.cpython-39.pyc │ │ │ │ ├── imagebind_model.cpython-39.pyc │ │ │ │ ├── multimodal_preprocessors.cpython-39.pyc │ │ │ │ └── transformer.cpython-39.pyc │ │ │ ├── helpers.py │ │ │ ├── imagebind_model.py │ │ │ ├── multimodal_preprocessors.py │ │ │ └── transformer.py │ │ └── requirements.txt │ ├── Qformer.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── Qformer.cpython-39.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── base_model.cpython-39.pyc │ │ ├── blip2.cpython-39.pyc │ │ ├── eva_vit.cpython-39.pyc │ │ ├── modeling_llama.cpython-39.pyc │ │ ├── moviechat.cpython-39.pyc │ │ ├── video_agg_model.cpython-39.pyc │ │ ├── video_goal_inference_model.cpython-39.pyc │ │ ├── video_instruction_ft_model.cpython-39.pyc │ │ ├── video_llama.cpython-39.pyc │ │ └── video_llama_captioning.cpython-39.pyc │ ├── base_model.py │ ├── blip2.py │ ├── blip2_outputs.py │ ├── eva_vit.py │ ├── modeling_llama.py │ └── video_agg_model.py ├── processors │ ├── .ipynb_checkpoints │ │ └── video_processor-checkpoint.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── base_processor.cpython-39.pyc │ │ ├── blip_processors.cpython-39.pyc │ │ ├── functional_video.cpython-39.pyc │ │ ├── randaugment.cpython-39.pyc │ │ ├── transforms_video.cpython-39.pyc │ │ └── video_processor.cpython-39.pyc │ ├── base_processor.py │ ├── blip_processors.py │ ├── functional_video.py │ ├── randaugment.py │ ├── transforms_video.py │ └── video_processor.py ├── runners │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ └── runner_base.cpython-39.pyc │ ├── runner_base.py │ └── test.py └── tasks │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-39.pyc │ ├── base_task.cpython-39.pyc │ ├── image_text_pretrain.cpython-39.pyc │ └── video_text_pretrain.cpython-39.pyc │ ├── base_task.py │ ├── image_text_pretrain.py │ └── video_text_pretrain.py ├── preprocessing_scripts └── extract_video_frames.py ├── prompts └── alignment_image.txt ├── requirement.txt ├── setup.py ├── train_configs ├── conversation_demo.yaml └── video_aggregation_finetune.yaml ├── train_video_agg_model.py └── video_agg_dataloader.py /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/LICENSE -------------------------------------------------------------------------------- /LICENSE_Lavis.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/LICENSE_Lavis.md -------------------------------------------------------------------------------- /LICENSE_Minigpt4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/LICENSE_Minigpt4.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/README.md -------------------------------------------------------------------------------- /apply_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/apply_delta.py -------------------------------------------------------------------------------- /data_filtering/compute_clip_frame_scores.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/data_filtering/compute_clip_frame_scores.py -------------------------------------------------------------------------------- /data_filtering/filtered_videos.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/data_filtering/filtered_videos.pkl -------------------------------------------------------------------------------- /demo_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/demo_video.py -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/environment.yml -------------------------------------------------------------------------------- /eval_configs/conversation_demo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/eval_configs/conversation_demo.yaml -------------------------------------------------------------------------------- /eval_configs/koala_eval_only_vl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/eval_configs/koala_eval_only_vl.yaml -------------------------------------------------------------------------------- /eval_qa_egoschema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/eval_qa_egoschema.py -------------------------------------------------------------------------------- /figs/architecture_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/figs/architecture_v2.png -------------------------------------------------------------------------------- /koala/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/__init__.py -------------------------------------------------------------------------------- /koala/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /koala/common/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/common/__pycache__/config.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/__pycache__/config.cpython-39.pyc -------------------------------------------------------------------------------- /koala/common/__pycache__/dist_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/__pycache__/dist_utils.cpython-39.pyc -------------------------------------------------------------------------------- /koala/common/__pycache__/logger.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/__pycache__/logger.cpython-39.pyc -------------------------------------------------------------------------------- /koala/common/__pycache__/optims.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/__pycache__/optims.cpython-39.pyc -------------------------------------------------------------------------------- /koala/common/__pycache__/registry.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/__pycache__/registry.cpython-39.pyc -------------------------------------------------------------------------------- /koala/common/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /koala/common/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/config.py -------------------------------------------------------------------------------- /koala/common/dist_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/dist_utils.py -------------------------------------------------------------------------------- /koala/common/gradcam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/gradcam.py -------------------------------------------------------------------------------- /koala/common/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/logger.py -------------------------------------------------------------------------------- /koala/common/optims.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/optims.py -------------------------------------------------------------------------------- /koala/common/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/registry.py -------------------------------------------------------------------------------- /koala/common/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/common/utils.py -------------------------------------------------------------------------------- /koala/configs/datasets/cc_sbu/align.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/configs/datasets/cc_sbu/align.yaml -------------------------------------------------------------------------------- /koala/configs/datasets/cc_sbu/defaults.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/configs/datasets/cc_sbu/defaults.yaml -------------------------------------------------------------------------------- /koala/configs/datasets/instruct/llava_instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/configs/datasets/instruct/llava_instruct.yaml -------------------------------------------------------------------------------- /koala/configs/datasets/instruct/webvid_instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/configs/datasets/instruct/webvid_instruct.yaml -------------------------------------------------------------------------------- /koala/configs/datasets/laion/defaults.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/configs/datasets/laion/defaults.yaml -------------------------------------------------------------------------------- /koala/configs/datasets/webvid/defaults.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/configs/datasets/webvid/defaults.yaml -------------------------------------------------------------------------------- /koala/configs/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/configs/default.yaml -------------------------------------------------------------------------------- /koala/configs/models/minigpt4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/configs/models/minigpt4.yaml -------------------------------------------------------------------------------- /koala/configs/models/video_llama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/configs/models/video_llama.yaml -------------------------------------------------------------------------------- /koala/conversation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /koala/conversation/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/conversation/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/conversation/__pycache__/conversation_video.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/conversation/__pycache__/conversation_video.cpython-39.pyc -------------------------------------------------------------------------------- /koala/conversation/conversation_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/conversation/conversation_video.py -------------------------------------------------------------------------------- /koala/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /koala/datasets/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/__pycache__/data_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/__pycache__/data_utils.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/builders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/builders/__init__.py -------------------------------------------------------------------------------- /koala/datasets/builders/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/builders/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/builders/__pycache__/base_dataset_builder.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/builders/__pycache__/image_text_pair_builder.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/builders/__pycache__/instruct_builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/builders/__pycache__/instruct_builder.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/builders/__pycache__/video_caption_builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/builders/__pycache__/video_caption_builder.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/builders/base_dataset_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/builders/base_dataset_builder.py -------------------------------------------------------------------------------- /koala/datasets/builders/image_text_pair_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/builders/image_text_pair_builder.py -------------------------------------------------------------------------------- /koala/datasets/builders/instruct_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/builders/instruct_builder.py -------------------------------------------------------------------------------- /koala/datasets/builders/video_caption_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/builders/video_caption_builder.py -------------------------------------------------------------------------------- /koala/datasets/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/data_utils.py -------------------------------------------------------------------------------- /koala/datasets/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /koala/datasets/datasets/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/datasets/__pycache__/base_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/__pycache__/base_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/datasets/__pycache__/caption_datasets.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/__pycache__/caption_datasets.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/__pycache__/cc_sbu_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/datasets/__pycache__/dataloader_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/__pycache__/dataloader_utils.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/datasets/__pycache__/laion_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/__pycache__/laion_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/datasets/__pycache__/llava_instruct_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/__pycache__/llava_instruct_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/datasets/__pycache__/video_instruct_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/__pycache__/video_instruct_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/datasets/__pycache__/webvid_datasets.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/__pycache__/webvid_datasets.cpython-39.pyc -------------------------------------------------------------------------------- /koala/datasets/datasets/base_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/base_dataset.py -------------------------------------------------------------------------------- /koala/datasets/datasets/caption_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/caption_datasets.py -------------------------------------------------------------------------------- /koala/datasets/datasets/cc_sbu_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/cc_sbu_dataset.py -------------------------------------------------------------------------------- /koala/datasets/datasets/dataloader_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/dataloader_utils.py -------------------------------------------------------------------------------- /koala/datasets/datasets/laion_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/laion_dataset.py -------------------------------------------------------------------------------- /koala/datasets/datasets/llava_instruct_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/llava_instruct_dataset.py -------------------------------------------------------------------------------- /koala/datasets/datasets/video_instruct_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/video_instruct_dataset.py -------------------------------------------------------------------------------- /koala/datasets/datasets/webvid_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/datasets/datasets/webvid_datasets.py -------------------------------------------------------------------------------- /koala/models/ImageBind/.assets/bird_audio.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/.assets/bird_audio.wav -------------------------------------------------------------------------------- /koala/models/ImageBind/.assets/bird_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/.assets/bird_image.jpg -------------------------------------------------------------------------------- /koala/models/ImageBind/.assets/car_audio.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/.assets/car_audio.wav -------------------------------------------------------------------------------- /koala/models/ImageBind/.assets/car_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/.assets/car_image.jpg -------------------------------------------------------------------------------- /koala/models/ImageBind/.assets/dog_audio.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/.assets/dog_audio.wav -------------------------------------------------------------------------------- /koala/models/ImageBind/.assets/dog_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/.assets/dog_image.jpg -------------------------------------------------------------------------------- /koala/models/ImageBind/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /koala/models/ImageBind/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/CONTRIBUTING.md -------------------------------------------------------------------------------- /koala/models/ImageBind/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/LICENSE -------------------------------------------------------------------------------- /koala/models/ImageBind/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/README.md -------------------------------------------------------------------------------- /koala/models/ImageBind/__pycache__/data.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/__pycache__/data.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /koala/models/ImageBind/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/data.py -------------------------------------------------------------------------------- /koala/models/ImageBind/model_card.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/model_card.md -------------------------------------------------------------------------------- /koala/models/ImageBind/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /koala/models/ImageBind/models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/ImageBind/models/__pycache__/helpers.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/models/__pycache__/helpers.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/ImageBind/models/__pycache__/imagebind_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/models/__pycache__/imagebind_model.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/ImageBind/models/__pycache__/multimodal_preprocessors.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/models/__pycache__/multimodal_preprocessors.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/ImageBind/models/__pycache__/transformer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/models/__pycache__/transformer.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/ImageBind/models/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/models/helpers.py -------------------------------------------------------------------------------- /koala/models/ImageBind/models/imagebind_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/models/imagebind_model.py -------------------------------------------------------------------------------- /koala/models/ImageBind/models/multimodal_preprocessors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/models/multimodal_preprocessors.py -------------------------------------------------------------------------------- /koala/models/ImageBind/models/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/models/transformer.py -------------------------------------------------------------------------------- /koala/models/ImageBind/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/ImageBind/requirements.txt -------------------------------------------------------------------------------- /koala/models/Qformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/Qformer.py -------------------------------------------------------------------------------- /koala/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__init__.py -------------------------------------------------------------------------------- /koala/models/__pycache__/Qformer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/Qformer.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/base_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/base_model.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/blip2.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/blip2.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/eva_vit.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/eva_vit.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/modeling_llama.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/modeling_llama.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/moviechat.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/moviechat.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/video_agg_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/video_agg_model.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/video_goal_inference_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/video_goal_inference_model.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/video_instruction_ft_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/video_instruction_ft_model.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/video_llama.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/video_llama.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/__pycache__/video_llama_captioning.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/__pycache__/video_llama_captioning.cpython-39.pyc -------------------------------------------------------------------------------- /koala/models/base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/base_model.py -------------------------------------------------------------------------------- /koala/models/blip2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/blip2.py -------------------------------------------------------------------------------- /koala/models/blip2_outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/blip2_outputs.py -------------------------------------------------------------------------------- /koala/models/eva_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/eva_vit.py -------------------------------------------------------------------------------- /koala/models/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/modeling_llama.py -------------------------------------------------------------------------------- /koala/models/video_agg_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/models/video_agg_model.py -------------------------------------------------------------------------------- /koala/processors/.ipynb_checkpoints/video_processor-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/.ipynb_checkpoints/video_processor-checkpoint.py -------------------------------------------------------------------------------- /koala/processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/__init__.py -------------------------------------------------------------------------------- /koala/processors/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/processors/__pycache__/base_processor.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/__pycache__/base_processor.cpython-39.pyc -------------------------------------------------------------------------------- /koala/processors/__pycache__/blip_processors.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/__pycache__/blip_processors.cpython-39.pyc -------------------------------------------------------------------------------- /koala/processors/__pycache__/functional_video.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/__pycache__/functional_video.cpython-39.pyc -------------------------------------------------------------------------------- /koala/processors/__pycache__/randaugment.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/__pycache__/randaugment.cpython-39.pyc -------------------------------------------------------------------------------- /koala/processors/__pycache__/transforms_video.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/__pycache__/transforms_video.cpython-39.pyc -------------------------------------------------------------------------------- /koala/processors/__pycache__/video_processor.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/__pycache__/video_processor.cpython-39.pyc -------------------------------------------------------------------------------- /koala/processors/base_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/base_processor.py -------------------------------------------------------------------------------- /koala/processors/blip_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/blip_processors.py -------------------------------------------------------------------------------- /koala/processors/functional_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/functional_video.py -------------------------------------------------------------------------------- /koala/processors/randaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/randaugment.py -------------------------------------------------------------------------------- /koala/processors/transforms_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/transforms_video.py -------------------------------------------------------------------------------- /koala/processors/video_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/processors/video_processor.py -------------------------------------------------------------------------------- /koala/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/runners/__init__.py -------------------------------------------------------------------------------- /koala/runners/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/runners/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/runners/__pycache__/runner_base.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/runners/__pycache__/runner_base.cpython-39.pyc -------------------------------------------------------------------------------- /koala/runners/runner_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/runners/runner_base.py -------------------------------------------------------------------------------- /koala/runners/test.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /koala/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/tasks/__init__.py -------------------------------------------------------------------------------- /koala/tasks/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/tasks/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /koala/tasks/__pycache__/base_task.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/tasks/__pycache__/base_task.cpython-39.pyc -------------------------------------------------------------------------------- /koala/tasks/__pycache__/image_text_pretrain.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/tasks/__pycache__/image_text_pretrain.cpython-39.pyc -------------------------------------------------------------------------------- /koala/tasks/__pycache__/video_text_pretrain.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/tasks/__pycache__/video_text_pretrain.cpython-39.pyc -------------------------------------------------------------------------------- /koala/tasks/base_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/tasks/base_task.py -------------------------------------------------------------------------------- /koala/tasks/image_text_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/tasks/image_text_pretrain.py -------------------------------------------------------------------------------- /koala/tasks/video_text_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/koala/tasks/video_text_pretrain.py -------------------------------------------------------------------------------- /preprocessing_scripts/extract_video_frames.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/preprocessing_scripts/extract_video_frames.py -------------------------------------------------------------------------------- /prompts/alignment_image.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/prompts/alignment_image.txt -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/requirement.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/setup.py -------------------------------------------------------------------------------- /train_configs/conversation_demo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/train_configs/conversation_demo.yaml -------------------------------------------------------------------------------- /train_configs/video_aggregation_finetune.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/train_configs/video_aggregation_finetune.yaml -------------------------------------------------------------------------------- /train_video_agg_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/train_video_agg_model.py -------------------------------------------------------------------------------- /video_agg_dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rxtan2/Koala-video-llm/HEAD/video_agg_dataloader.py --------------------------------------------------------------------------------