├── .github ├── logo.png └── method.png ├── .gitignore ├── LICENSE ├── README.md ├── demo ├── app.py └── assets │ ├── bot.png │ └── user.png ├── docs ├── BENCHMARK.md ├── DEMO.md ├── EVAL.md └── TRAIN.md ├── requirements.txt ├── scripts ├── evaluation │ ├── eval_auto_2b.sh │ ├── eval_auto_7b.sh │ ├── eval_dist_auto_2b.sh │ ├── eval_dist_auto_7b.sh │ └── eval_qvhighlights.sh ├── finetune │ ├── finetune_qvhighlights_2b.sh │ └── finetune_qvhighlights_7b.sh ├── pretrain │ ├── pretrain_grounder_2b.sh │ ├── pretrain_grounder_7b.sh │ ├── pretrain_planner_2b.sh │ ├── pretrain_planner_7b.sh │ ├── pretrain_verifier_2b.sh │ └── pretrain_verifier_7b.sh ├── zero2.json ├── zero2_offload.json ├── zero3.json └── zero3_offload.json ├── setup.cfg └── videomind ├── constants.py ├── conversation.py ├── dataset ├── __init__.py ├── collator.py ├── hybrid.py ├── sub_classes │ ├── __init__.py │ ├── activitynet_captions.py │ ├── activitynet_rtl.py │ ├── cgbench.py │ ├── charades_sta.py │ ├── cosmo_cap.py │ ├── didemo.py │ ├── ego4d_naq.py │ ├── ego4d_nlq.py │ ├── ego_timeqa.py │ ├── hirest.py │ ├── internvit_vtime.py │ ├── longvideobench.py │ ├── lvbench.py │ ├── mlvu.py │ ├── mvbench.py │ ├── nextgqa.py │ ├── nextqa.py │ ├── qa_ego4d.py │ ├── queryd.py │ ├── qvhighlights.py │ ├── rextime.py │ ├── star.py │ ├── tacos.py │ ├── vid_morp.py │ ├── videomme.py │ ├── videoxum.py │ └── youcook2.py ├── utils.py └── wrappers │ ├── __init__.py │ ├── answering.py │ ├── grounding.py │ ├── planning.py │ └── verifying.py ├── eval ├── eval_auto.py ├── eval_qvhighlights.py ├── infer_auto.py └── infer_qvhighlights.py ├── model ├── __init__.py ├── blocks.py ├── builder.py ├── generator.py ├── loss.py └── model.py ├── train ├── custom_trainer.py └── train.py └── utils ├── io.py └── parser.py /.github/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/.github/logo.png -------------------------------------------------------------------------------- /.github/method.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/.github/method.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/README.md -------------------------------------------------------------------------------- /demo/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/demo/app.py -------------------------------------------------------------------------------- /demo/assets/bot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/demo/assets/bot.png -------------------------------------------------------------------------------- /demo/assets/user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/demo/assets/user.png -------------------------------------------------------------------------------- /docs/BENCHMARK.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/docs/BENCHMARK.md -------------------------------------------------------------------------------- /docs/DEMO.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/docs/DEMO.md -------------------------------------------------------------------------------- /docs/EVAL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/docs/EVAL.md -------------------------------------------------------------------------------- /docs/TRAIN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/docs/TRAIN.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/evaluation/eval_auto_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/evaluation/eval_auto_2b.sh -------------------------------------------------------------------------------- /scripts/evaluation/eval_auto_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/evaluation/eval_auto_7b.sh -------------------------------------------------------------------------------- /scripts/evaluation/eval_dist_auto_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/evaluation/eval_dist_auto_2b.sh -------------------------------------------------------------------------------- /scripts/evaluation/eval_dist_auto_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/evaluation/eval_dist_auto_7b.sh -------------------------------------------------------------------------------- /scripts/evaluation/eval_qvhighlights.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/evaluation/eval_qvhighlights.sh -------------------------------------------------------------------------------- /scripts/finetune/finetune_qvhighlights_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/finetune/finetune_qvhighlights_2b.sh -------------------------------------------------------------------------------- /scripts/finetune/finetune_qvhighlights_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/finetune/finetune_qvhighlights_7b.sh -------------------------------------------------------------------------------- /scripts/pretrain/pretrain_grounder_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/pretrain/pretrain_grounder_2b.sh -------------------------------------------------------------------------------- /scripts/pretrain/pretrain_grounder_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/pretrain/pretrain_grounder_7b.sh -------------------------------------------------------------------------------- /scripts/pretrain/pretrain_planner_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/pretrain/pretrain_planner_2b.sh -------------------------------------------------------------------------------- /scripts/pretrain/pretrain_planner_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/pretrain/pretrain_planner_7b.sh -------------------------------------------------------------------------------- /scripts/pretrain/pretrain_verifier_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/pretrain/pretrain_verifier_2b.sh -------------------------------------------------------------------------------- /scripts/pretrain/pretrain_verifier_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/pretrain/pretrain_verifier_7b.sh -------------------------------------------------------------------------------- /scripts/zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/zero2.json -------------------------------------------------------------------------------- /scripts/zero2_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/zero2_offload.json -------------------------------------------------------------------------------- /scripts/zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/zero3.json -------------------------------------------------------------------------------- /scripts/zero3_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/scripts/zero3_offload.json -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/setup.cfg -------------------------------------------------------------------------------- /videomind/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/constants.py -------------------------------------------------------------------------------- /videomind/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/conversation.py -------------------------------------------------------------------------------- /videomind/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/__init__.py -------------------------------------------------------------------------------- /videomind/dataset/collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/collator.py -------------------------------------------------------------------------------- /videomind/dataset/hybrid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/hybrid.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/__init__.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/activitynet_captions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/activitynet_captions.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/activitynet_rtl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/activitynet_rtl.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/cgbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/cgbench.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/charades_sta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/charades_sta.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/cosmo_cap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/cosmo_cap.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/didemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/didemo.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/ego4d_naq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/ego4d_naq.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/ego4d_nlq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/ego4d_nlq.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/ego_timeqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/ego_timeqa.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/hirest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/hirest.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/internvit_vtime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/internvit_vtime.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/longvideobench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/longvideobench.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/lvbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/lvbench.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/mlvu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/mlvu.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/mvbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/mvbench.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/nextgqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/nextgqa.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/nextqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/nextqa.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/qa_ego4d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/qa_ego4d.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/queryd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/queryd.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/qvhighlights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/qvhighlights.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/rextime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/rextime.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/star.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/star.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/tacos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/tacos.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/vid_morp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/vid_morp.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/videomme.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/videomme.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/videoxum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/videoxum.py -------------------------------------------------------------------------------- /videomind/dataset/sub_classes/youcook2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/sub_classes/youcook2.py -------------------------------------------------------------------------------- /videomind/dataset/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/utils.py -------------------------------------------------------------------------------- /videomind/dataset/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/wrappers/__init__.py -------------------------------------------------------------------------------- /videomind/dataset/wrappers/answering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/wrappers/answering.py -------------------------------------------------------------------------------- /videomind/dataset/wrappers/grounding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/wrappers/grounding.py -------------------------------------------------------------------------------- /videomind/dataset/wrappers/planning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/wrappers/planning.py -------------------------------------------------------------------------------- /videomind/dataset/wrappers/verifying.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/dataset/wrappers/verifying.py -------------------------------------------------------------------------------- /videomind/eval/eval_auto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/eval/eval_auto.py -------------------------------------------------------------------------------- /videomind/eval/eval_qvhighlights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/eval/eval_qvhighlights.py -------------------------------------------------------------------------------- /videomind/eval/infer_auto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/eval/infer_auto.py -------------------------------------------------------------------------------- /videomind/eval/infer_qvhighlights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/eval/infer_qvhighlights.py -------------------------------------------------------------------------------- /videomind/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/model/__init__.py -------------------------------------------------------------------------------- /videomind/model/blocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/model/blocks.py -------------------------------------------------------------------------------- /videomind/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/model/builder.py -------------------------------------------------------------------------------- /videomind/model/generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/model/generator.py -------------------------------------------------------------------------------- /videomind/model/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/model/loss.py -------------------------------------------------------------------------------- /videomind/model/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/model/model.py -------------------------------------------------------------------------------- /videomind/train/custom_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/train/custom_trainer.py -------------------------------------------------------------------------------- /videomind/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/train/train.py -------------------------------------------------------------------------------- /videomind/utils/io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/utils/io.py -------------------------------------------------------------------------------- /videomind/utils/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeliudev/VideoMind/HEAD/videomind/utils/parser.py --------------------------------------------------------------------------------