├── .gitignore ├── README.md ├── assets ├── architecture_v3.jpeg └── teaser2.jpeg ├── baseline ├── datasets │ └── DATASET.md ├── eval_multihop_qa.sh ├── eval_rtl.sh ├── gelm │ ├── __init__.py │ ├── arguments.py │ ├── constants.py │ ├── dataset │ │ ├── __init__.py │ │ ├── base_dataset.py │ │ ├── hybrid_dataset.py │ │ ├── multihop_qa_dataset.py │ │ └── temporal_reasoning_dataset.py │ ├── eval │ │ ├── eval_gpt_review_rtl.py │ │ ├── inference_multihop_qa.py │ │ ├── inference_rtl.py │ │ ├── summarize_gpt_review.py │ │ └── table │ │ │ └── rule.txt │ ├── model │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── gelm_arch.py │ │ ├── grounding_head.py │ │ └── language_model │ │ │ └── gelm_llama.py │ ├── train │ │ ├── train.py │ │ └── train_mem.py │ └── utils.py ├── llava │ ├── __init__.py │ ├── constants.py │ ├── conversation.py │ ├── mm_utils.py │ ├── model │ │ ├── __init__.py │ │ ├── apply_delta.py │ │ ├── builder.py │ │ ├── consolidate.py │ │ ├── language_model │ │ │ ├── llava_llama.py │ │ │ ├── llava_mpt.py │ │ │ └── mpt │ │ │ │ ├── adapt_tokenizer.py │ │ │ │ ├── attention.py │ │ │ │ ├── blocks.py │ │ │ │ ├── configuration_mpt.py │ │ │ │ ├── custom_embedding.py │ │ │ │ ├── flash_attn_triton.py │ │ │ │ ├── hf_prefixlm_converter.py │ │ │ │ ├── meta_init_context.py │ │ │ │ ├── modeling_mpt.py │ │ │ │ ├── norm.py │ │ │ │ └── param_init_fns.py │ │ ├── llava_arch.py │ │ ├── make_delta.py │ │ ├── multimodal_encoder │ │ │ ├── builder.py │ │ │ └── clip_encoder.py │ │ └── utils.py │ ├── serve │ │ ├── __init__.py │ │ ├── cli.py │ │ ├── controller.py │ │ ├── examples │ │ │ ├── extreme_ironing.jpg │ │ │ └── waterview.jpg │ │ ├── gradio_web_server.py │ │ ├── model_worker.py │ │ ├── register_worker.py │ │ └── test_message.py │ ├── train │ │ ├── llama_flash_attn_monkey_patch.py │ │ ├── llava_trainer.py │ │ ├── train.py │ │ └── train_mem.py │ └── utils.py ├── pyproject.toml └── scripts │ ├── finetune_mixed.sh │ ├── finetune_multihop_qa.sh │ └── finetune_rtl.sh └── benchmark ├── metrics ├── answering_metrics.py ├── evaluate.sh ├── evaluate_answering.py └── evaluate_grounding.py └── zero-shot-inference ├── GPT-4o.py ├── InternVL-Chat.py ├── LLaVa-NeXT-Video.py ├── TimeChat.py ├── VTimeLLM.py ├── inference.sh └── pipeline ├── caption.py └── reason.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/README.md -------------------------------------------------------------------------------- /assets/architecture_v3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/assets/architecture_v3.jpeg -------------------------------------------------------------------------------- /assets/teaser2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/assets/teaser2.jpeg -------------------------------------------------------------------------------- /baseline/datasets/DATASET.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/datasets/DATASET.md -------------------------------------------------------------------------------- /baseline/eval_multihop_qa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/eval_multihop_qa.sh -------------------------------------------------------------------------------- /baseline/eval_rtl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/eval_rtl.sh -------------------------------------------------------------------------------- /baseline/gelm/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import GelmLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /baseline/gelm/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/arguments.py -------------------------------------------------------------------------------- /baseline/gelm/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/constants.py -------------------------------------------------------------------------------- /baseline/gelm/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baseline/gelm/dataset/base_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/dataset/base_dataset.py -------------------------------------------------------------------------------- /baseline/gelm/dataset/hybrid_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/dataset/hybrid_dataset.py -------------------------------------------------------------------------------- /baseline/gelm/dataset/multihop_qa_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/dataset/multihop_qa_dataset.py -------------------------------------------------------------------------------- /baseline/gelm/dataset/temporal_reasoning_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/dataset/temporal_reasoning_dataset.py -------------------------------------------------------------------------------- /baseline/gelm/eval/eval_gpt_review_rtl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/eval/eval_gpt_review_rtl.py -------------------------------------------------------------------------------- /baseline/gelm/eval/inference_multihop_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/eval/inference_multihop_qa.py -------------------------------------------------------------------------------- /baseline/gelm/eval/inference_rtl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/eval/inference_rtl.py -------------------------------------------------------------------------------- /baseline/gelm/eval/summarize_gpt_review.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/eval/summarize_gpt_review.py -------------------------------------------------------------------------------- /baseline/gelm/eval/table/rule.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/eval/table/rule.txt -------------------------------------------------------------------------------- /baseline/gelm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/model/__init__.py -------------------------------------------------------------------------------- /baseline/gelm/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/model/builder.py -------------------------------------------------------------------------------- /baseline/gelm/model/gelm_arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/model/gelm_arch.py -------------------------------------------------------------------------------- /baseline/gelm/model/grounding_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/model/grounding_head.py -------------------------------------------------------------------------------- /baseline/gelm/model/language_model/gelm_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/model/language_model/gelm_llama.py -------------------------------------------------------------------------------- /baseline/gelm/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/train/train.py -------------------------------------------------------------------------------- /baseline/gelm/train/train_mem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/train/train_mem.py -------------------------------------------------------------------------------- /baseline/gelm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/gelm/utils.py -------------------------------------------------------------------------------- /baseline/llava/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import LlavaLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /baseline/llava/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/constants.py -------------------------------------------------------------------------------- /baseline/llava/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/conversation.py -------------------------------------------------------------------------------- /baseline/llava/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/mm_utils.py -------------------------------------------------------------------------------- /baseline/llava/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/__init__.py -------------------------------------------------------------------------------- /baseline/llava/model/apply_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/apply_delta.py -------------------------------------------------------------------------------- /baseline/llava/model/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/builder.py -------------------------------------------------------------------------------- /baseline/llava/model/consolidate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/consolidate.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/llava_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/llava_llama.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/llava_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/llava_mpt.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/adapt_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/adapt_tokenizer.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/attention.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/blocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/blocks.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/configuration_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/configuration_mpt.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/custom_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/custom_embedding.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/flash_attn_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/flash_attn_triton.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/hf_prefixlm_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/hf_prefixlm_converter.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/meta_init_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/meta_init_context.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/modeling_mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/modeling_mpt.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/norm.py -------------------------------------------------------------------------------- /baseline/llava/model/language_model/mpt/param_init_fns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/language_model/mpt/param_init_fns.py -------------------------------------------------------------------------------- /baseline/llava/model/llava_arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/llava_arch.py -------------------------------------------------------------------------------- /baseline/llava/model/make_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/make_delta.py -------------------------------------------------------------------------------- /baseline/llava/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/multimodal_encoder/builder.py -------------------------------------------------------------------------------- /baseline/llava/model/multimodal_encoder/clip_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/multimodal_encoder/clip_encoder.py -------------------------------------------------------------------------------- /baseline/llava/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/model/utils.py -------------------------------------------------------------------------------- /baseline/llava/serve/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baseline/llava/serve/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/serve/cli.py -------------------------------------------------------------------------------- /baseline/llava/serve/controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/serve/controller.py -------------------------------------------------------------------------------- /baseline/llava/serve/examples/extreme_ironing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/serve/examples/extreme_ironing.jpg -------------------------------------------------------------------------------- /baseline/llava/serve/examples/waterview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/serve/examples/waterview.jpg -------------------------------------------------------------------------------- /baseline/llava/serve/gradio_web_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/serve/gradio_web_server.py -------------------------------------------------------------------------------- /baseline/llava/serve/model_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/serve/model_worker.py -------------------------------------------------------------------------------- /baseline/llava/serve/register_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/serve/register_worker.py -------------------------------------------------------------------------------- /baseline/llava/serve/test_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/serve/test_message.py -------------------------------------------------------------------------------- /baseline/llava/train/llama_flash_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/train/llama_flash_attn_monkey_patch.py -------------------------------------------------------------------------------- /baseline/llava/train/llava_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/train/llava_trainer.py -------------------------------------------------------------------------------- /baseline/llava/train/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/train/train.py -------------------------------------------------------------------------------- /baseline/llava/train/train_mem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/train/train_mem.py -------------------------------------------------------------------------------- /baseline/llava/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/llava/utils.py -------------------------------------------------------------------------------- /baseline/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/pyproject.toml -------------------------------------------------------------------------------- /baseline/scripts/finetune_mixed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/scripts/finetune_mixed.sh -------------------------------------------------------------------------------- /baseline/scripts/finetune_multihop_qa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/scripts/finetune_multihop_qa.sh -------------------------------------------------------------------------------- /baseline/scripts/finetune_rtl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/baseline/scripts/finetune_rtl.sh -------------------------------------------------------------------------------- /benchmark/metrics/answering_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/metrics/answering_metrics.py -------------------------------------------------------------------------------- /benchmark/metrics/evaluate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/metrics/evaluate.sh -------------------------------------------------------------------------------- /benchmark/metrics/evaluate_answering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/metrics/evaluate_answering.py -------------------------------------------------------------------------------- /benchmark/metrics/evaluate_grounding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/metrics/evaluate_grounding.py -------------------------------------------------------------------------------- /benchmark/zero-shot-inference/GPT-4o.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/zero-shot-inference/GPT-4o.py -------------------------------------------------------------------------------- /benchmark/zero-shot-inference/InternVL-Chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/zero-shot-inference/InternVL-Chat.py -------------------------------------------------------------------------------- /benchmark/zero-shot-inference/LLaVa-NeXT-Video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/zero-shot-inference/LLaVa-NeXT-Video.py -------------------------------------------------------------------------------- /benchmark/zero-shot-inference/TimeChat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/zero-shot-inference/TimeChat.py -------------------------------------------------------------------------------- /benchmark/zero-shot-inference/VTimeLLM.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/zero-shot-inference/VTimeLLM.py -------------------------------------------------------------------------------- /benchmark/zero-shot-inference/inference.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/zero-shot-inference/inference.sh -------------------------------------------------------------------------------- /benchmark/zero-shot-inference/pipeline/caption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/zero-shot-inference/pipeline/caption.py -------------------------------------------------------------------------------- /benchmark/zero-shot-inference/pipeline/reason.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qirui-chen/MultiHop-EgoQA/HEAD/benchmark/zero-shot-inference/pipeline/reason.py --------------------------------------------------------------------------------