├── Dataset.md ├── README.md ├── RUN_VideoGLaMM.md ├── Training.md ├── VideoGLaMM ├── .DS_Store ├── .gitignore ├── LICENSE ├── chat.py ├── eval_anet_entities_infer.py ├── eval_gcg_infer.py ├── eval_gcg_metrics.py ├── eval_grounding.py ├── eval_mevis.py ├── eval_referdavis_infer.py ├── eval_referdavis_metrics.py ├── gcg_data_gen │ ├── .DS_Store │ ├── anet_entities_gcg │ │ ├── 1_dev_anet_entities_for_gcg.py │ │ ├── 2_anet_entities_gcg_openai_refine.py │ │ └── 3_anet_entities_gcg_extract_masks.py │ ├── burst_ytvis_gcg │ │ ├── README.md │ │ ├── generate_annotations.py │ │ ├── generation.py │ │ ├── merge_b_y.py │ │ └── requirements.txt │ ├── dev_dataset_visualize.py │ ├── hcstvg_gcg │ │ ├── dev_hcstvg_2_gcg_captions.py │ │ └── dev_hcstvg_2_mask_gen.py │ ├── mevis_gcg │ │ └── dev_mevis_gcg.py │ ├── vidstg_gcg │ │ ├── dev_vidstg_gcg_captions.py │ │ └── dev_vidstg_gcg_mask_gen.py │ └── ytvos_gcg │ │ └── dev_ytvos_gcg.py ├── model │ ├── .DS_Store │ ├── VideoGLaMM.py │ ├── chatunivi │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── conversation.py │ │ ├── mm_utils.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── arch.py │ │ │ ├── builder.py │ │ │ ├── cluster.py │ │ │ ├── language_model │ │ │ │ └── llama.py │ │ │ └── multimodal_encoder │ │ │ │ ├── builder.py │ │ │ │ └── clip_encoder.py │ │ └── utils.py │ ├── llava │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── conversation.py │ │ ├── mm_utils.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── apply_delta.py │ │ │ ├── builder.py │ │ │ ├── consolidate.py │ │ │ ├── language_model │ │ │ │ ├── llava_llama.py │ │ │ │ ├── llava_mpt.py │ │ │ │ └── mpt │ │ │ │ │ ├── adapt_tokenizer.py │ │ │ │ │ ├── attention.py │ │ │ │ │ ├── blocks.py │ │ │ │ │ ├── configuration_mpt.py │ │ │ │ │ ├── custom_embedding.py │ │ │ │ │ ├── flash_attn_triton.py │ │ │ │ │ ├── hf_prefixlm_converter.py │ │ │ │ │ ├── meta_init_context.py │ │ │ │ │ ├── modeling_mpt.py │ │ │ │ │ ├── norm.py │ │ │ │ │ └── param_init_fns.py │ │ │ ├── llava_arch.py │ │ │ ├── make_delta.py │ │ │ ├── multimodal_encoder │ │ │ │ ├── builder.py │ │ │ │ └── clip_encoder.py │ │ │ └── utils.py │ │ ├── train │ │ │ ├── llama_flash_attn_monkey_patch.py │ │ │ ├── llava_trainer.py │ │ │ ├── train.py │ │ │ └── train_mem.py │ │ └── utils.py │ ├── segment_anything │ │ ├── __init__.py │ │ ├── automatic_mask_generator.py │ │ ├── build_sam.py │ │ ├── modeling │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── image_encoder.py │ │ │ ├── mask_decoder.py │ │ │ ├── prompt_encoder.py │ │ │ ├── sam.py │ │ │ └── transformer.py │ │ ├── predictor.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── amg.py │ │ │ ├── onnx.py │ │ │ └── transforms.py │ ├── segment_anything_2 │ │ ├── sam2 │ │ │ ├── __init__.py │ │ │ ├── automatic_mask_generator.py │ │ │ ├── build_sam.py │ │ │ ├── csrc │ │ │ │ └── connected_components.cu │ │ │ ├── modeling │ │ │ │ ├── __init__.py │ │ │ │ ├── backbones │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── hieradet.py │ │ │ │ │ ├── image_encoder.py │ │ │ │ │ └── utils.py │ │ │ │ ├── memory_attention.py │ │ │ │ ├── memory_encoder.py │ │ │ │ ├── position_encoding.py │ │ │ │ ├── sam │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── mask_decoder.py │ │ │ │ │ ├── prompt_encoder.py │ │ │ │ │ └── transformer.py │ │ │ │ ├── sam2_base.py │ │ │ │ └── sam2_utils.py │ │ │ ├── sam2_image_predictor.py │ │ │ ├── sam2_video_predictor.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── amg.py │ │ │ │ ├── misc.py │ │ │ │ └── transforms.py │ │ ├── sam2_configs │ │ │ ├── __init__.py │ │ │ ├── sam2_hiera_b+.yaml │ │ │ ├── sam2_hiera_l.yaml │ │ │ ├── sam2_hiera_s.yaml │ │ │ └── sam2_hiera_t.yaml │ │ └── setup.py │ └── videogpt_plus │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── conversation.py │ │ ├── mm_utils.py │ │ └── model │ │ ├── __init__.py │ │ ├── arch.py │ │ ├── builder.py │ │ ├── dataloader.py │ │ ├── internvideo │ │ ├── build_internvideo.py │ │ ├── config.py │ │ ├── easydict.py │ │ ├── flash_attention_class.py │ │ ├── internvideo2.py │ │ ├── internvideo2_stage2_config_vision.py │ │ ├── pos_embed.py │ │ └── utils.py │ │ ├── language_model │ │ ├── llama3_1.py │ │ └── phi3.py │ │ ├── multimodal_encoder │ │ ├── builder.py │ │ ├── clip_encoder.py │ │ └── processor.py │ │ └── multimodal_projector │ │ └── builder.py ├── requirements.txt ├── train_ds_with_videogptplus.py └── utils │ ├── .DS_Store │ ├── __init__.py │ ├── ade20k_classes.json │ ├── clair.py │ ├── cocostuff_classes.txt │ ├── conv_generator.py │ ├── conversation.py │ ├── data_processing.py │ ├── dataset.py │ ├── enc_preprocessors.py │ ├── grandf_dataset.py │ ├── grefcoco.py │ ├── grefer.py │ ├── grounded_video_qa.py │ ├── grounding_utils │ ├── __init__.py │ ├── box_ops.py │ ├── image_transforms.py │ └── misc.py │ ├── hcstvg_dataset.py │ ├── itm_transforms.py │ ├── mevis_dataset.py │ ├── mevis_gcg.py │ ├── misc.py │ ├── ordered_datasets │ ├── ordered_mevis.py │ └── ordered_rvos.py │ ├── preproc_hcstvgv2.py │ ├── preproc_vidstg.py │ ├── reason_seg_dataset.py │ ├── refer.py │ ├── refer_datasets │ ├── __init__.py │ ├── a2d.py │ ├── box_ops.py │ ├── davis.py │ ├── jhmdb.py │ ├── mevis.py │ ├── new │ │ ├── davis17.py │ │ └── ytvos.py │ ├── transforms.py │ └── ytvos.py │ ├── refer_seg_dataset.py │ ├── refer_vos_dataset.py │ ├── sam_transforms.py │ ├── sem_seg_dataset.py │ ├── temporal_grounding_datasets.py │ ├── trainer.py │ ├── utils.py │ ├── video_gcg_anet.py │ ├── video_gcg_dataset.py │ ├── video_vqa_dataset.py │ ├── vidstg_dataset.py │ ├── vidstg_hcstvg_gcg.py │ ├── vqa_dataset.py │ └── ytvos_gcg.py └── docs └── images ├── .DS_Store ├── figures ├── cvpr25-teaser.png ├── cvpr25_main_block_diagram-jpg.jpg ├── cvpr25_qualitative.png └── videoglamm_annotation_pipeline.png └── logos ├── IVAL_logo.png ├── MBZUAI_logo.png ├── Oryx_logo.png └── logo-videoglamm.png /Dataset.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/Dataset.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/README.md -------------------------------------------------------------------------------- /RUN_VideoGLaMM.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/RUN_VideoGLaMM.md -------------------------------------------------------------------------------- /Training.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/Training.md -------------------------------------------------------------------------------- /VideoGLaMM/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/.DS_Store -------------------------------------------------------------------------------- /VideoGLaMM/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/.gitignore -------------------------------------------------------------------------------- /VideoGLaMM/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/LICENSE -------------------------------------------------------------------------------- /VideoGLaMM/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/chat.py -------------------------------------------------------------------------------- /VideoGLaMM/eval_anet_entities_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/eval_anet_entities_infer.py -------------------------------------------------------------------------------- /VideoGLaMM/eval_gcg_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/eval_gcg_infer.py -------------------------------------------------------------------------------- /VideoGLaMM/eval_gcg_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/eval_gcg_metrics.py -------------------------------------------------------------------------------- /VideoGLaMM/eval_grounding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/eval_grounding.py -------------------------------------------------------------------------------- /VideoGLaMM/eval_mevis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/eval_mevis.py -------------------------------------------------------------------------------- /VideoGLaMM/eval_referdavis_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/eval_referdavis_infer.py -------------------------------------------------------------------------------- /VideoGLaMM/eval_referdavis_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/eval_referdavis_metrics.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/.DS_Store -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/anet_entities_gcg/1_dev_anet_entities_for_gcg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/anet_entities_gcg/1_dev_anet_entities_for_gcg.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/anet_entities_gcg/2_anet_entities_gcg_openai_refine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/anet_entities_gcg/2_anet_entities_gcg_openai_refine.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/anet_entities_gcg/3_anet_entities_gcg_extract_masks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/anet_entities_gcg/3_anet_entities_gcg_extract_masks.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/burst_ytvis_gcg/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/burst_ytvis_gcg/README.md -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/burst_ytvis_gcg/generate_annotations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/burst_ytvis_gcg/generate_annotations.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/burst_ytvis_gcg/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/burst_ytvis_gcg/generation.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/burst_ytvis_gcg/merge_b_y.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/burst_ytvis_gcg/merge_b_y.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/burst_ytvis_gcg/requirements.txt: -------------------------------------------------------------------------------- 1 | pillow==10.3.0 2 | pycocotools==2.0.6 3 | google-generativeai -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/dev_dataset_visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/dev_dataset_visualize.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/hcstvg_gcg/dev_hcstvg_2_gcg_captions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/hcstvg_gcg/dev_hcstvg_2_gcg_captions.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/hcstvg_gcg/dev_hcstvg_2_mask_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/hcstvg_gcg/dev_hcstvg_2_mask_gen.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/mevis_gcg/dev_mevis_gcg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/mevis_gcg/dev_mevis_gcg.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/vidstg_gcg/dev_vidstg_gcg_captions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/vidstg_gcg/dev_vidstg_gcg_captions.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/vidstg_gcg/dev_vidstg_gcg_mask_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/vidstg_gcg/dev_vidstg_gcg_mask_gen.py -------------------------------------------------------------------------------- /VideoGLaMM/gcg_data_gen/ytvos_gcg/dev_ytvos_gcg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/gcg_data_gen/ytvos_gcg/dev_ytvos_gcg.py -------------------------------------------------------------------------------- /VideoGLaMM/model/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/.DS_Store -------------------------------------------------------------------------------- /VideoGLaMM/model/VideoGLaMM.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/VideoGLaMM.py -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/chatunivi/constants.py -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/chatunivi/conversation.py -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/chatunivi/mm_utils.py -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/model/arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/chatunivi/model/arch.py -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/chatunivi/model/builder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/model/cluster.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/chatunivi/model/cluster.py -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/model/language_model/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/chatunivi/model/language_model/llama.py -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/chatunivi/model/multimodal_encoder/builder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/model/multimodal_encoder/clip_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/chatunivi/model/multimodal_encoder/clip_encoder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/chatunivi/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/chatunivi/utils.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import LlavaLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/constants.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/conversation.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/mm_utils.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/apply_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/apply_delta.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/builder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/consolidate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/consolidate.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/llava_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/llava_llama.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/llava_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/llava_mpt.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/adapt_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/adapt_tokenizer.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/attention.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/blocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/blocks.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/configuration_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/configuration_mpt.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/custom_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/custom_embedding.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/flash_attn_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/flash_attn_triton.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/hf_prefixlm_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/hf_prefixlm_converter.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/meta_init_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/meta_init_context.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/modeling_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/modeling_mpt.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/norm.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/language_model/mpt/param_init_fns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/language_model/mpt/param_init_fns.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/llava_arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/llava_arch.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/make_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/make_delta.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/multimodal_encoder/builder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/multimodal_encoder/clip_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/multimodal_encoder/clip_encoder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/model/utils.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/train/llama_flash_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/train/llama_flash_attn_monkey_patch.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/train/llava_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/train/llava_trainer.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/train/train.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/train/train_mem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/train/train_mem.py -------------------------------------------------------------------------------- /VideoGLaMM/model/llava/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/llava/utils.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/automatic_mask_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/automatic_mask_generator.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/build_sam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/build_sam.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/modeling/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/modeling/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/modeling/common.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/modeling/image_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/modeling/image_encoder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/modeling/mask_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/modeling/mask_decoder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/modeling/prompt_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/modeling/prompt_encoder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/modeling/sam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/modeling/sam.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/modeling/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/modeling/transformer.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/predictor.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/utils/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/utils/amg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/utils/amg.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/utils/onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/utils/onnx.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything/utils/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything/utils/transforms.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/automatic_mask_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/automatic_mask_generator.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/build_sam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/build_sam.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/csrc/connected_components.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/csrc/connected_components.cu -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/backbones/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/backbones/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/backbones/hieradet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/backbones/hieradet.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/backbones/image_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/backbones/image_encoder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/backbones/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/backbones/utils.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/memory_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/memory_attention.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/memory_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/memory_encoder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/position_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/position_encoding.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/sam/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/sam/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/sam/mask_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/sam/mask_decoder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/sam/prompt_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/sam/prompt_encoder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/sam/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/sam/transformer.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/sam2_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/sam2_base.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/modeling/sam2_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/modeling/sam2_utils.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/sam2_image_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/sam2_image_predictor.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/sam2_video_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/sam2_video_predictor.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/utils/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/utils/amg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/utils/amg.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/utils/misc.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2/utils/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2/utils/transforms.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2_configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2_configs/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2_configs/sam2_hiera_b+.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2_configs/sam2_hiera_b+.yaml -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2_configs/sam2_hiera_l.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2_configs/sam2_hiera_l.yaml -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2_configs/sam2_hiera_s.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2_configs/sam2_hiera_s.yaml -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/sam2_configs/sam2_hiera_t.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/sam2_configs/sam2_hiera_t.yaml -------------------------------------------------------------------------------- /VideoGLaMM/model/segment_anything_2/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/segment_anything_2/setup.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/constants.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/conversation.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/mm_utils.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/__init__.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/arch.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/builder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/dataloader.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/internvideo/build_internvideo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/internvideo/build_internvideo.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/internvideo/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/internvideo/config.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/internvideo/easydict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/internvideo/easydict.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/internvideo/flash_attention_class.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/internvideo/flash_attention_class.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/internvideo/internvideo2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/internvideo/internvideo2.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/internvideo/internvideo2_stage2_config_vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/internvideo/internvideo2_stage2_config_vision.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/internvideo/pos_embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/internvideo/pos_embed.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/internvideo/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/internvideo/utils.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/language_model/llama3_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/language_model/llama3_1.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/language_model/phi3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/language_model/phi3.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/multimodal_encoder/builder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/multimodal_encoder/clip_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/multimodal_encoder/clip_encoder.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/multimodal_encoder/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/multimodal_encoder/processor.py -------------------------------------------------------------------------------- /VideoGLaMM/model/videogpt_plus/model/multimodal_projector/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/model/videogpt_plus/model/multimodal_projector/builder.py -------------------------------------------------------------------------------- /VideoGLaMM/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/requirements.txt -------------------------------------------------------------------------------- /VideoGLaMM/train_ds_with_videogptplus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/train_ds_with_videogptplus.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/.DS_Store -------------------------------------------------------------------------------- /VideoGLaMM/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VideoGLaMM/utils/ade20k_classes.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/ade20k_classes.json -------------------------------------------------------------------------------- /VideoGLaMM/utils/clair.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/clair.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/cocostuff_classes.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/cocostuff_classes.txt -------------------------------------------------------------------------------- /VideoGLaMM/utils/conv_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/conv_generator.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/conversation.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/data_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/data_processing.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/enc_preprocessors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/enc_preprocessors.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/grandf_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/grandf_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/grefcoco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/grefcoco.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/grefer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/grefer.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/grounded_video_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/grounded_video_qa.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/grounding_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VideoGLaMM/utils/grounding_utils/box_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/grounding_utils/box_ops.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/grounding_utils/image_transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/grounding_utils/image_transforms.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/grounding_utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/grounding_utils/misc.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/hcstvg_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/hcstvg_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/itm_transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/itm_transforms.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/mevis_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/mevis_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/mevis_gcg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/mevis_gcg.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/misc.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/ordered_datasets/ordered_mevis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/ordered_datasets/ordered_mevis.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/ordered_datasets/ordered_rvos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/ordered_datasets/ordered_rvos.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/preproc_hcstvgv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/preproc_hcstvgv2.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/preproc_vidstg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/preproc_vidstg.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/reason_seg_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/reason_seg_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_datasets/a2d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_datasets/a2d.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_datasets/box_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_datasets/box_ops.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_datasets/davis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_datasets/davis.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_datasets/jhmdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_datasets/jhmdb.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_datasets/mevis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_datasets/mevis.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_datasets/new/davis17.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_datasets/new/davis17.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_datasets/new/ytvos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_datasets/new/ytvos.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_datasets/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_datasets/transforms.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_datasets/ytvos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_datasets/ytvos.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_seg_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_seg_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/refer_vos_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/refer_vos_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/sam_transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/sam_transforms.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/sem_seg_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/sem_seg_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/temporal_grounding_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/temporal_grounding_datasets.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/trainer.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/utils.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/video_gcg_anet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/video_gcg_anet.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/video_gcg_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/video_gcg_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/video_vqa_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/video_vqa_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/vidstg_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/vidstg_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/vidstg_hcstvg_gcg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/vidstg_hcstvg_gcg.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/vqa_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/vqa_dataset.py -------------------------------------------------------------------------------- /VideoGLaMM/utils/ytvos_gcg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/VideoGLaMM/utils/ytvos_gcg.py -------------------------------------------------------------------------------- /docs/images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/docs/images/.DS_Store -------------------------------------------------------------------------------- /docs/images/figures/cvpr25-teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/docs/images/figures/cvpr25-teaser.png -------------------------------------------------------------------------------- /docs/images/figures/cvpr25_main_block_diagram-jpg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/docs/images/figures/cvpr25_main_block_diagram-jpg.jpg -------------------------------------------------------------------------------- /docs/images/figures/cvpr25_qualitative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/docs/images/figures/cvpr25_qualitative.png -------------------------------------------------------------------------------- /docs/images/figures/videoglamm_annotation_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/docs/images/figures/videoglamm_annotation_pipeline.png -------------------------------------------------------------------------------- /docs/images/logos/IVAL_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/docs/images/logos/IVAL_logo.png -------------------------------------------------------------------------------- /docs/images/logos/MBZUAI_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/docs/images/logos/MBZUAI_logo.png -------------------------------------------------------------------------------- /docs/images/logos/Oryx_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/docs/images/logos/Oryx_logo.png -------------------------------------------------------------------------------- /docs/images/logos/logo-videoglamm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbzuai-oryx/VideoGLaMM/HEAD/docs/images/logos/logo-videoglamm.png --------------------------------------------------------------------------------