├── README.md ├── configs ├── sample_config.yaml ├── zero2.json └── zero3.json ├── data ├── __pycache__ │ ├── video_llm_data.cpython-310.pyc │ ├── video_llm_data.cpython-311.pyc │ └── video_llm_data.cpython-39.pyc ├── processors │ ├── box_processor.py │ ├── dpo_processor.py │ ├── image_transform.py │ ├── online_vqa_processor.py │ ├── templates │ │ ├── rec_templates.txt │ │ ├── reg_templates.txt │ │ ├── sot_templates.txt │ │ ├── videorec_templates.txt │ │ └── videoreg_templates.txt │ ├── vision_processor.py │ └── vqa_processor.py ├── sampler │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── group_sampler.cpython-311.pyc │ │ └── group_sampler.cpython-39.pyc │ └── group_sampler.py └── video_llm_data.py ├── eval └── eval.py ├── models ├── adapter │ ├── __init__.py │ ├── dynamic_spatial_pooling.py │ └── projector.py └── video_llm.py └── utils └── io.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/README.md -------------------------------------------------------------------------------- /configs/sample_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/configs/sample_config.yaml -------------------------------------------------------------------------------- /configs/zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/configs/zero2.json -------------------------------------------------------------------------------- /configs/zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/configs/zero3.json -------------------------------------------------------------------------------- /data/__pycache__/video_llm_data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/__pycache__/video_llm_data.cpython-310.pyc -------------------------------------------------------------------------------- /data/__pycache__/video_llm_data.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/__pycache__/video_llm_data.cpython-311.pyc -------------------------------------------------------------------------------- /data/__pycache__/video_llm_data.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/__pycache__/video_llm_data.cpython-39.pyc -------------------------------------------------------------------------------- /data/processors/box_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/box_processor.py -------------------------------------------------------------------------------- /data/processors/dpo_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/dpo_processor.py -------------------------------------------------------------------------------- /data/processors/image_transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/image_transform.py -------------------------------------------------------------------------------- /data/processors/online_vqa_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/online_vqa_processor.py -------------------------------------------------------------------------------- /data/processors/templates/rec_templates.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/templates/rec_templates.txt -------------------------------------------------------------------------------- /data/processors/templates/reg_templates.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/templates/reg_templates.txt -------------------------------------------------------------------------------- /data/processors/templates/sot_templates.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/templates/sot_templates.txt -------------------------------------------------------------------------------- /data/processors/templates/videorec_templates.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/templates/videorec_templates.txt -------------------------------------------------------------------------------- /data/processors/templates/videoreg_templates.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/templates/videoreg_templates.txt -------------------------------------------------------------------------------- /data/processors/vision_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/vision_processor.py -------------------------------------------------------------------------------- /data/processors/vqa_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/processors/vqa_processor.py -------------------------------------------------------------------------------- /data/sampler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/sampler/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/sampler/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /data/sampler/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/sampler/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /data/sampler/__pycache__/group_sampler.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/sampler/__pycache__/group_sampler.cpython-311.pyc -------------------------------------------------------------------------------- /data/sampler/__pycache__/group_sampler.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/sampler/__pycache__/group_sampler.cpython-39.pyc -------------------------------------------------------------------------------- /data/sampler/group_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/sampler/group_sampler.py -------------------------------------------------------------------------------- /data/video_llm_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/data/video_llm_data.py -------------------------------------------------------------------------------- /eval/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/eval/eval.py -------------------------------------------------------------------------------- /models/adapter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/models/adapter/__init__.py -------------------------------------------------------------------------------- /models/adapter/dynamic_spatial_pooling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/models/adapter/dynamic_spatial_pooling.py -------------------------------------------------------------------------------- /models/adapter/projector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/models/adapter/projector.py -------------------------------------------------------------------------------- /models/video_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/models/video_llm.py -------------------------------------------------------------------------------- /utils/io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hon-Wong/ByteVideoLLM/HEAD/utils/io.py --------------------------------------------------------------------------------