├── .gitignore ├── LICENSE ├── README.md ├── inference.py ├── misc └── synthetic_caption.py ├── mochi ├── .python-version ├── README.md ├── assets │ └── mochi-factory.webp ├── contrib │ ├── README.md │ └── modal │ │ ├── lora.yaml │ │ ├── main.py │ │ └── readme.md ├── demos │ ├── api_example.py │ ├── cli.py │ ├── comfyui_nodes.py │ ├── fine_tuner │ │ ├── README.md │ │ ├── configs │ │ │ └── lora.yaml │ │ ├── dataset.py │ │ ├── embed_captions.py │ │ ├── encode_videos.py │ │ ├── preprocess.bash │ │ ├── run.bash │ │ ├── train.py │ │ └── trim_and_crop_videos.py │ ├── gradio_ui.py │ └── test_encoder_decoder.py ├── pyproject.toml ├── scripts │ ├── download_weights.py │ ├── format.bash │ ├── pytorch_to_safe_tensors.py │ ├── typecheck.bash │ └── weights_to_fp8.py ├── src │ └── genmo │ │ ├── lib │ │ ├── attn_imports.py │ │ ├── progress.py │ │ └── utils.py │ │ └── mochi_preview │ │ ├── __init__.py │ │ ├── dit │ │ └── joint_model │ │ │ ├── __init__.py │ │ │ ├── asymm_models_joint.py │ │ │ ├── audio_adapter.py │ │ │ ├── context_parallel.py │ │ │ ├── layers.py │ │ │ ├── lora.py │ │ │ ├── mod_rmsnorm.py │ │ │ ├── residual_tanh_gated_rmsnorm.py │ │ │ ├── rope_mixed.py │ │ │ ├── temporal_rope.py │ │ │ └── utils.py │ │ ├── pipelines.py │ │ └── vae │ │ ├── __init__.py │ │ ├── cp_conv.py │ │ ├── latent_dist.py │ │ ├── models.py │ │ └── vae_stats.py └── uv.lock ├── music_infuser ├── configs │ └── music_infuser.yaml ├── dataset.py ├── download_weights.py ├── embed_captions.py ├── encode_videos.py ├── preprocess.bash ├── run.bash ├── train.py └── trim_and_crop_videos.py ├── requirements.txt └── vlm_eval ├── .gitignore ├── LICENSE ├── README.md ├── assets ├── bird-twitter-car.wav ├── logo.png ├── pipeline.png └── sora.png ├── eval_alignment.py ├── eval_quality.py ├── pyproject.toml ├── scripts ├── custom │ ├── finetune.sh │ ├── finetune_audio.sh │ ├── finetune_lora.sh │ ├── finetune_qlora.sh │ ├── pretrain_audio.sh │ └── va_joint.sh ├── eval │ ├── eval_audio_TUT2017.sh │ ├── eval_audio_clothoAQA.sh │ ├── eval_audio_video_AVQA.sh │ ├── eval_audio_video_AVSD.sh │ ├── eval_audio_video_AVSSD.sh │ ├── eval_audio_vocalsound.sh │ ├── eval_video_cap_msvc.sh │ ├── eval_video_mcqa_egoschema.sh │ ├── eval_video_mcqa_mvbench.sh │ ├── eval_video_mcqa_perception_test_mcqa.sh │ ├── eval_video_mcqa_videomme.sh │ ├── eval_video_oqa_activitynet.sh │ ├── eval_video_oqa_msvd.sh │ ├── eval_video_oqa_vcgpt_1_correctness.sh │ ├── eval_video_oqa_vcgpt_2_detail.sh │ ├── eval_video_oqa_vcgpt_3_context.sh │ ├── eval_video_oqa_vcgpt_4_temporal.sh │ └── eval_video_oqa_vcgpt_5_consistency.sh └── vllava │ ├── finetune.sh │ └── pretrain.sh └── videollama2 ├── __init__.py ├── constants.py ├── conversation.py ├── eval ├── eval_audio_TUT2017.py ├── eval_audio_clotho.py ├── eval_audio_clothoAQA.py ├── eval_audio_video_AVQA.py ├── eval_audio_video_AVSD.py ├── eval_audio_video_AVSSD.py ├── eval_audio_vocalsound.py ├── eval_video_cap_msvc_correctness.py ├── eval_video_cap_msvc_detailedness.py ├── eval_video_mcqa_mvbench.py ├── eval_video_mcqa_videomme.py ├── eval_video_oqa_activitynet.py ├── eval_video_oqa_vcgpt_1_correctness.py ├── eval_video_oqa_vcgpt_2_detailed_orientation.py ├── eval_video_oqa_vcgpt_3_context.py ├── eval_video_oqa_vcgpt_4_temporal.py ├── eval_video_oqa_vcgpt_5_consistency.py ├── inference_audio.py ├── inference_audio_video.py ├── inference_video_cap_msvc.py ├── inference_video_mcqa_egoschema.py ├── inference_video_mcqa_mvbench.py ├── inference_video_mcqa_perception_test_mcqa.py ├── inference_video_mcqa_videomme.py ├── inference_video_oqa_activitynet.py ├── inference_video_oqa_vcgpt_consistency.py └── inference_video_oqa_vcgpt_general.py ├── mm_utils.py ├── model ├── __init__.py ├── beats │ ├── BEATs.py │ ├── LICENSE_beats │ ├── Tokenizers.py │ ├── __init__.py │ ├── backbone.py │ ├── modules.py │ ├── quantizer.py │ └── weight_norm_fix.py ├── encoder.py ├── mel_filters.npz ├── projector.py ├── videollama2_arch.py ├── videollama2_gemma2.py ├── videollama2_llama.py ├── videollama2_mistral.py ├── videollama2_mixtral.py ├── videollama2_phi3.py └── videollama2_qwen2.py ├── serve ├── cli.py ├── controller.py ├── examples │ ├── bird-twitter-car.wav │ ├── desert.jpg │ ├── door.of.bar.raining2.wav │ ├── extreme_ironing.jpg │ └── waterview.jpg ├── gradio_web_server.py ├── gradio_web_server_adhoc.py ├── gradio_web_server_adhoc_av.py ├── model_worker.py ├── register_worker.py ├── sglang_worker.py └── test_message.py ├── train.py ├── utils.py └── videollama2_trainer.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/README.md -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/inference.py -------------------------------------------------------------------------------- /misc/synthetic_caption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/misc/synthetic_caption.py -------------------------------------------------------------------------------- /mochi/.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /mochi/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/README.md -------------------------------------------------------------------------------- /mochi/assets/mochi-factory.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/assets/mochi-factory.webp -------------------------------------------------------------------------------- /mochi/contrib/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/contrib/README.md -------------------------------------------------------------------------------- /mochi/contrib/modal/lora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/contrib/modal/lora.yaml -------------------------------------------------------------------------------- /mochi/contrib/modal/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/contrib/modal/main.py -------------------------------------------------------------------------------- /mochi/contrib/modal/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/contrib/modal/readme.md -------------------------------------------------------------------------------- /mochi/demos/api_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/api_example.py -------------------------------------------------------------------------------- /mochi/demos/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/cli.py -------------------------------------------------------------------------------- /mochi/demos/comfyui_nodes.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mochi/demos/fine_tuner/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/fine_tuner/README.md -------------------------------------------------------------------------------- /mochi/demos/fine_tuner/configs/lora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/fine_tuner/configs/lora.yaml -------------------------------------------------------------------------------- /mochi/demos/fine_tuner/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/fine_tuner/dataset.py -------------------------------------------------------------------------------- /mochi/demos/fine_tuner/embed_captions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/fine_tuner/embed_captions.py -------------------------------------------------------------------------------- /mochi/demos/fine_tuner/encode_videos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/fine_tuner/encode_videos.py -------------------------------------------------------------------------------- /mochi/demos/fine_tuner/preprocess.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/fine_tuner/preprocess.bash -------------------------------------------------------------------------------- /mochi/demos/fine_tuner/run.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/fine_tuner/run.bash -------------------------------------------------------------------------------- /mochi/demos/fine_tuner/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/fine_tuner/train.py -------------------------------------------------------------------------------- /mochi/demos/fine_tuner/trim_and_crop_videos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/fine_tuner/trim_and_crop_videos.py -------------------------------------------------------------------------------- /mochi/demos/gradio_ui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/gradio_ui.py -------------------------------------------------------------------------------- /mochi/demos/test_encoder_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/demos/test_encoder_decoder.py -------------------------------------------------------------------------------- /mochi/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/pyproject.toml -------------------------------------------------------------------------------- /mochi/scripts/download_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/scripts/download_weights.py -------------------------------------------------------------------------------- /mochi/scripts/format.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/scripts/format.bash -------------------------------------------------------------------------------- /mochi/scripts/pytorch_to_safe_tensors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/scripts/pytorch_to_safe_tensors.py -------------------------------------------------------------------------------- /mochi/scripts/typecheck.bash: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | npx pyright -------------------------------------------------------------------------------- /mochi/scripts/weights_to_fp8.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mochi/src/genmo/lib/attn_imports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/lib/attn_imports.py -------------------------------------------------------------------------------- /mochi/src/genmo/lib/progress.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/lib/progress.py -------------------------------------------------------------------------------- /mochi/src/genmo/lib/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/lib/utils.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/asymm_models_joint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/dit/joint_model/asymm_models_joint.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/audio_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/dit/joint_model/audio_adapter.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/context_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/dit/joint_model/context_parallel.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/dit/joint_model/layers.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/dit/joint_model/lora.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/mod_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/dit/joint_model/mod_rmsnorm.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/residual_tanh_gated_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/dit/joint_model/residual_tanh_gated_rmsnorm.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/rope_mixed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/dit/joint_model/rope_mixed.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/temporal_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/dit/joint_model/temporal_rope.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/dit/joint_model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/dit/joint_model/utils.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/pipelines.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/vae/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/vae/cp_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/vae/cp_conv.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/vae/latent_dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/vae/latent_dist.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/vae/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/vae/models.py -------------------------------------------------------------------------------- /mochi/src/genmo/mochi_preview/vae/vae_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/src/genmo/mochi_preview/vae/vae_stats.py -------------------------------------------------------------------------------- /mochi/uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/mochi/uv.lock -------------------------------------------------------------------------------- /music_infuser/configs/music_infuser.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/music_infuser/configs/music_infuser.yaml -------------------------------------------------------------------------------- /music_infuser/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/music_infuser/dataset.py -------------------------------------------------------------------------------- /music_infuser/download_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/music_infuser/download_weights.py -------------------------------------------------------------------------------- /music_infuser/embed_captions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/music_infuser/embed_captions.py -------------------------------------------------------------------------------- /music_infuser/encode_videos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/music_infuser/encode_videos.py -------------------------------------------------------------------------------- /music_infuser/preprocess.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/music_infuser/preprocess.bash -------------------------------------------------------------------------------- /music_infuser/run.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/music_infuser/run.bash -------------------------------------------------------------------------------- /music_infuser/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/music_infuser/train.py -------------------------------------------------------------------------------- /music_infuser/trim_and_crop_videos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/music_infuser/trim_and_crop_videos.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/requirements.txt -------------------------------------------------------------------------------- /vlm_eval/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/.gitignore -------------------------------------------------------------------------------- /vlm_eval/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/LICENSE -------------------------------------------------------------------------------- /vlm_eval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/README.md -------------------------------------------------------------------------------- /vlm_eval/assets/bird-twitter-car.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/assets/bird-twitter-car.wav -------------------------------------------------------------------------------- /vlm_eval/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/assets/logo.png -------------------------------------------------------------------------------- /vlm_eval/assets/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/assets/pipeline.png -------------------------------------------------------------------------------- /vlm_eval/assets/sora.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/assets/sora.png -------------------------------------------------------------------------------- /vlm_eval/eval_alignment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/eval_alignment.py -------------------------------------------------------------------------------- /vlm_eval/eval_quality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/eval_quality.py -------------------------------------------------------------------------------- /vlm_eval/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/pyproject.toml -------------------------------------------------------------------------------- /vlm_eval/scripts/custom/finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/custom/finetune.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/custom/finetune_audio.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/custom/finetune_audio.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/custom/finetune_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/custom/finetune_lora.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/custom/finetune_qlora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/custom/finetune_qlora.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/custom/pretrain_audio.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/custom/pretrain_audio.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/custom/va_joint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/custom/va_joint.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_audio_TUT2017.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_audio_TUT2017.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_audio_clothoAQA.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_audio_clothoAQA.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_audio_video_AVQA.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_audio_video_AVQA.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_audio_video_AVSD.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_audio_video_AVSD.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_audio_video_AVSSD.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_audio_video_AVSSD.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_audio_vocalsound.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_audio_vocalsound.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_cap_msvc.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_cap_msvc.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_mcqa_egoschema.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_mcqa_egoschema.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_mcqa_mvbench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_mcqa_mvbench.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_mcqa_perception_test_mcqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_mcqa_perception_test_mcqa.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_mcqa_videomme.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_mcqa_videomme.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_oqa_activitynet.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_oqa_activitynet.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_oqa_msvd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_oqa_msvd.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_oqa_vcgpt_1_correctness.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_oqa_vcgpt_1_correctness.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_oqa_vcgpt_2_detail.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_oqa_vcgpt_2_detail.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_oqa_vcgpt_3_context.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_oqa_vcgpt_3_context.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_oqa_vcgpt_4_temporal.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_oqa_vcgpt_4_temporal.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/eval/eval_video_oqa_vcgpt_5_consistency.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/eval/eval_video_oqa_vcgpt_5_consistency.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/vllava/finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/vllava/finetune.sh -------------------------------------------------------------------------------- /vlm_eval/scripts/vllava/pretrain.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/scripts/vllava/pretrain.sh -------------------------------------------------------------------------------- /vlm_eval/videollama2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/__init__.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/constants.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/conversation.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_audio_TUT2017.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_audio_TUT2017.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_audio_clotho.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_audio_clotho.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_audio_clothoAQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_audio_clothoAQA.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_audio_video_AVQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_audio_video_AVQA.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_audio_video_AVSD.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_audio_video_AVSD.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_audio_video_AVSSD.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_audio_video_AVSSD.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_audio_vocalsound.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_audio_vocalsound.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_video_cap_msvc_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_video_cap_msvc_correctness.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_video_cap_msvc_detailedness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_video_cap_msvc_detailedness.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_video_mcqa_mvbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_video_mcqa_mvbench.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_video_mcqa_videomme.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_video_mcqa_videomme.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_video_oqa_activitynet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_video_oqa_activitynet.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_video_oqa_vcgpt_1_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_video_oqa_vcgpt_1_correctness.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_video_oqa_vcgpt_2_detailed_orientation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_video_oqa_vcgpt_2_detailed_orientation.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_video_oqa_vcgpt_3_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_video_oqa_vcgpt_3_context.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_video_oqa_vcgpt_4_temporal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_video_oqa_vcgpt_4_temporal.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/eval_video_oqa_vcgpt_5_consistency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/eval_video_oqa_vcgpt_5_consistency.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/inference_audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/inference_audio.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/inference_audio_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/inference_audio_video.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/inference_video_cap_msvc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/inference_video_cap_msvc.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/inference_video_mcqa_egoschema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/inference_video_mcqa_egoschema.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/inference_video_mcqa_mvbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/inference_video_mcqa_mvbench.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/inference_video_mcqa_perception_test_mcqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/inference_video_mcqa_perception_test_mcqa.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/inference_video_mcqa_videomme.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/inference_video_mcqa_videomme.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/inference_video_oqa_activitynet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/inference_video_oqa_activitynet.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/inference_video_oqa_vcgpt_consistency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/inference_video_oqa_vcgpt_consistency.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/eval/inference_video_oqa_vcgpt_general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/eval/inference_video_oqa_vcgpt_general.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/mm_utils.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/__init__.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/beats/BEATs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/beats/BEATs.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/beats/LICENSE_beats: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/beats/LICENSE_beats -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/beats/Tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/beats/Tokenizers.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/beats/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/beats/backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/beats/backbone.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/beats/modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/beats/modules.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/beats/quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/beats/quantizer.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/beats/weight_norm_fix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/beats/weight_norm_fix.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/encoder.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/mel_filters.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/mel_filters.npz -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/projector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/projector.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/videollama2_arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/videollama2_arch.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/videollama2_gemma2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/videollama2_gemma2.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/videollama2_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/videollama2_llama.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/videollama2_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/videollama2_mistral.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/videollama2_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/videollama2_mixtral.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/videollama2_phi3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/videollama2_phi3.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/model/videollama2_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/model/videollama2_qwen2.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/cli.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/controller.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/examples/bird-twitter-car.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/examples/bird-twitter-car.wav -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/examples/desert.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/examples/desert.jpg -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/examples/door.of.bar.raining2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/examples/door.of.bar.raining2.wav -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/examples/extreme_ironing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/examples/extreme_ironing.jpg -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/examples/waterview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/examples/waterview.jpg -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/gradio_web_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/gradio_web_server.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/gradio_web_server_adhoc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/gradio_web_server_adhoc.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/gradio_web_server_adhoc_av.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/gradio_web_server_adhoc_av.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/model_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/model_worker.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/register_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/register_worker.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/sglang_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/sglang_worker.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/serve/test_message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/serve/test_message.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/train.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/utils.py -------------------------------------------------------------------------------- /vlm_eval/videollama2/videollama2_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SusungHong/MusicInfuser/HEAD/vlm_eval/videollama2/videollama2_trainer.py --------------------------------------------------------------------------------