├── .gitignore
├── README.md
├── datasets
    ├── chat
    │   └── base_template.py
    ├── mix_grounded.py
    ├── mix_pretrain.py
    └── mix_sft.py
├── experiments
    ├── _3klvlS4W7A.mp4
    └── video0.mp4
├── inference.py
├── mm_utils
    ├── utils.py
    └── video_utils.py
├── model.png
├── models
    ├── internvideo2.py
    ├── llava_next_video.py
    ├── modeling_clip.py
    ├── modeling_llama.py
    └── modeling_phi3.py
├── overwatch
    └── overwatch.py
├── requirements.txt
├── scripts
    ├── download_data_stage1.sh
    ├── download_data_stage2.sh
    ├── download_data_stage3.sh
    ├── inference_llama3.sh
    ├── inference_phi3_5.sh
    ├── phi3.5_grounded_8_a100.sh
    ├── phi3.5_pretrain_8_a100.sh
    └── phi3.5_sft_8_a100.sh
├── train.py
└── training
    ├── base_strategy.py
    ├── fsdp.py
    └── metrics.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/.gitignore


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/README.md


--------------------------------------------------------------------------------
/datasets/chat/base_template.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/datasets/chat/base_template.py


--------------------------------------------------------------------------------
/datasets/mix_grounded.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/datasets/mix_grounded.py


--------------------------------------------------------------------------------
/datasets/mix_pretrain.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/datasets/mix_pretrain.py


--------------------------------------------------------------------------------
/datasets/mix_sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/datasets/mix_sft.py


--------------------------------------------------------------------------------
/experiments/_3klvlS4W7A.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/experiments/_3klvlS4W7A.mp4


--------------------------------------------------------------------------------
/experiments/video0.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/experiments/video0.mp4


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/inference.py


--------------------------------------------------------------------------------
/mm_utils/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/mm_utils/utils.py


--------------------------------------------------------------------------------
/mm_utils/video_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/mm_utils/video_utils.py


--------------------------------------------------------------------------------
/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/model.png


--------------------------------------------------------------------------------
/models/internvideo2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/models/internvideo2.py


--------------------------------------------------------------------------------
/models/llava_next_video.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/models/llava_next_video.py


--------------------------------------------------------------------------------
/models/modeling_clip.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/models/modeling_clip.py


--------------------------------------------------------------------------------
/models/modeling_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/models/modeling_llama.py


--------------------------------------------------------------------------------
/models/modeling_phi3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/models/modeling_phi3.py


--------------------------------------------------------------------------------
/overwatch/overwatch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/overwatch/overwatch.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/requirements.txt


--------------------------------------------------------------------------------
/scripts/download_data_stage1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/scripts/download_data_stage1.sh


--------------------------------------------------------------------------------
/scripts/download_data_stage2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/scripts/download_data_stage2.sh


--------------------------------------------------------------------------------
/scripts/download_data_stage3.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/scripts/download_data_stage3.sh


--------------------------------------------------------------------------------
/scripts/inference_llama3.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/scripts/inference_llama3.sh


--------------------------------------------------------------------------------
/scripts/inference_phi3_5.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/scripts/inference_phi3_5.sh


--------------------------------------------------------------------------------
/scripts/phi3.5_grounded_8_a100.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/scripts/phi3.5_grounded_8_a100.sh


--------------------------------------------------------------------------------
/scripts/phi3.5_pretrain_8_a100.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/scripts/phi3.5_pretrain_8_a100.sh


--------------------------------------------------------------------------------
/scripts/phi3.5_sft_8_a100.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/scripts/phi3.5_sft_8_a100.sh


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/train.py


--------------------------------------------------------------------------------
/training/base_strategy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/training/base_strategy.py


--------------------------------------------------------------------------------
/training/fsdp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/training/fsdp.py


--------------------------------------------------------------------------------
/training/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WHB139426/Grounded-Video-LLM/HEAD/training/metrics.py


--------------------------------------------------------------------------------