├── README.md ├── images ├── model.png └── performance.png ├── setup.sh ├── test ├── batch_infer.py ├── json_files │ ├── CGVDS_total_score.json │ ├── Konvid-1k_total_ds_score.json │ ├── LIVE-VQC_total_ds_score.json │ ├── LIVE-YT-Gaming_total_score.json │ ├── LSVQ_whole_test_1080p_ds_score.json │ ├── LSVQ_whole_test_ds_score.json │ ├── VDPVE_train_score.json │ ├── Waterloo_IVC_4K_total_score.json │ ├── live_hfr_total_score.json │ └── youtube_ugc_total.json ├── modeling_internvl_chat.py └── single_infer.py └── train ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENSE ├── README.md ├── assets ├── add_new_model.md ├── data2.png ├── iou.jpg ├── math-leaderboard.jpg ├── module.png ├── performance3.png ├── performance4.png └── wandb.jpg ├── modeling_internvl_chat.py ├── run_scripts └── run_grpo_vqa_internvl.sh ├── setup.sh └── src └── open-r1-multimodal ├── .gitignore ├── LICENSE ├── Makefile ├── configs ├── ddp.yaml ├── qwen2vl_sft_config.yaml ├── zero2.yaml └── zero3.yaml ├── data_config ├── rec.yaml └── rec_internvl.yaml ├── data_jsonl └── LSVQ_labels_train.jsonl ├── local_scripts ├── create_vision_cot_data.py ├── lmms_eval_qwen2vl.sh ├── prepare_hf_data.py ├── train_aria_moe.sh ├── train_qwen2_vl.sh ├── zero2.json ├── zero3.json ├── zero3.yaml ├── zero3_offload.json └── zero_stage2_config.json ├── setup.cfg ├── setup.py └── src └── open_r1 ├── __init__.py ├── configs.py ├── evaluate.py ├── generate.py ├── grpo.py ├── grpo_jsonl.py ├── grpo_rec.py ├── qwen2_5vl_monkey_patch.py ├── sft.py ├── trainer ├── __init__.py ├── grpo_config.py ├── grpo_trainer.py ├── internvl_chat │ ├── __init__.py │ ├── configuration_intern_vit.py │ ├── configuration_internvl_chat.py │ ├── conversation.py │ ├── modeling_intern_vit.py │ └── modeling_internvl_chat.py └── vllm_grpo_trainer.py ├── utils ├── __init__.py ├── callbacks.py ├── evaluation.py ├── hub.py ├── math.py └── pycocotools │ ├── coco.py │ └── cocoeval.py └── vlm_modules ├── __init__.py ├── internvl_module.py ├── qwen_module.py └── vlm_module.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/README.md -------------------------------------------------------------------------------- /images/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/images/model.png -------------------------------------------------------------------------------- /images/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/images/performance.png -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/setup.sh -------------------------------------------------------------------------------- /test/batch_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/batch_infer.py -------------------------------------------------------------------------------- /test/json_files/CGVDS_total_score.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/json_files/CGVDS_total_score.json -------------------------------------------------------------------------------- /test/json_files/Konvid-1k_total_ds_score.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/json_files/Konvid-1k_total_ds_score.json -------------------------------------------------------------------------------- /test/json_files/LIVE-VQC_total_ds_score.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/json_files/LIVE-VQC_total_ds_score.json -------------------------------------------------------------------------------- /test/json_files/LIVE-YT-Gaming_total_score.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/json_files/LIVE-YT-Gaming_total_score.json -------------------------------------------------------------------------------- /test/json_files/LSVQ_whole_test_1080p_ds_score.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/json_files/LSVQ_whole_test_1080p_ds_score.json -------------------------------------------------------------------------------- /test/json_files/LSVQ_whole_test_ds_score.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/json_files/LSVQ_whole_test_ds_score.json -------------------------------------------------------------------------------- /test/json_files/VDPVE_train_score.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/json_files/VDPVE_train_score.json -------------------------------------------------------------------------------- /test/json_files/Waterloo_IVC_4K_total_score.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/json_files/Waterloo_IVC_4K_total_score.json -------------------------------------------------------------------------------- /test/json_files/live_hfr_total_score.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/json_files/live_hfr_total_score.json -------------------------------------------------------------------------------- /test/json_files/youtube_ugc_total.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/json_files/youtube_ugc_total.json -------------------------------------------------------------------------------- /test/modeling_internvl_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/modeling_internvl_chat.py -------------------------------------------------------------------------------- /test/single_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/test/single_infer.py -------------------------------------------------------------------------------- /train/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /train/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/Dockerfile -------------------------------------------------------------------------------- /train/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/LICENSE -------------------------------------------------------------------------------- /train/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/README.md -------------------------------------------------------------------------------- /train/assets/add_new_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/assets/add_new_model.md -------------------------------------------------------------------------------- /train/assets/data2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/assets/data2.png -------------------------------------------------------------------------------- /train/assets/iou.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/assets/iou.jpg -------------------------------------------------------------------------------- /train/assets/math-leaderboard.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/assets/math-leaderboard.jpg -------------------------------------------------------------------------------- /train/assets/module.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/assets/module.png -------------------------------------------------------------------------------- /train/assets/performance3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/assets/performance3.png -------------------------------------------------------------------------------- /train/assets/performance4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/assets/performance4.png -------------------------------------------------------------------------------- /train/assets/wandb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/assets/wandb.jpg -------------------------------------------------------------------------------- /train/modeling_internvl_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/modeling_internvl_chat.py -------------------------------------------------------------------------------- /train/run_scripts/run_grpo_vqa_internvl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/run_scripts/run_grpo_vqa_internvl.sh -------------------------------------------------------------------------------- /train/setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/setup.sh -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/.gitignore -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/LICENSE -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/Makefile -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/configs/ddp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/configs/ddp.yaml -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/configs/qwen2vl_sft_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/configs/qwen2vl_sft_config.yaml -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/configs/zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/configs/zero2.yaml -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/configs/zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/configs/zero3.yaml -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/data_config/rec.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/data_config/rec.yaml -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/data_config/rec_internvl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/data_config/rec_internvl.yaml -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/data_jsonl/LSVQ_labels_train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/data_jsonl/LSVQ_labels_train.jsonl -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/local_scripts/create_vision_cot_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/local_scripts/create_vision_cot_data.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/local_scripts/lmms_eval_qwen2vl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/local_scripts/lmms_eval_qwen2vl.sh -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/local_scripts/prepare_hf_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/local_scripts/prepare_hf_data.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/local_scripts/train_aria_moe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/local_scripts/train_aria_moe.sh -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/local_scripts/train_qwen2_vl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/local_scripts/train_qwen2_vl.sh -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/local_scripts/zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/local_scripts/zero2.json -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/local_scripts/zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/local_scripts/zero3.json -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/local_scripts/zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/local_scripts/zero3.yaml -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/local_scripts/zero3_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/local_scripts/zero3_offload.json -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/local_scripts/zero_stage2_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/local_scripts/zero_stage2_config.json -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/setup.cfg -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/setup.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/configs.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/evaluate.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/generate.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/grpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/grpo.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/grpo_jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/grpo_jsonl.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/grpo_rec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/grpo_rec.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/qwen2_5vl_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/qwen2_5vl_monkey_patch.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/sft.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/trainer/__init__.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/trainer/grpo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/trainer/grpo_config.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/trainer/grpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/trainer/grpo_trainer.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/__init__.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/configuration_intern_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/configuration_intern_vit.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/configuration_internvl_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/configuration_internvl_chat.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/conversation.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/modeling_intern_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/modeling_intern_vit.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/modeling_internvl_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/trainer/internvl_chat/modeling_internvl_chat.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/trainer/vllm_grpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/trainer/vllm_grpo_trainer.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/utils/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/utils/callbacks.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/utils/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/utils/evaluation.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/utils/hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/utils/hub.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/utils/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/utils/math.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/utils/pycocotools/coco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/utils/pycocotools/coco.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/utils/pycocotools/cocoeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/utils/pycocotools/cocoeval.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/vlm_modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/vlm_modules/__init__.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/vlm_modules/internvl_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/vlm_modules/internvl_module.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/vlm_modules/qwen_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/vlm_modules/qwen_module.py -------------------------------------------------------------------------------- /train/src/open-r1-multimodal/src/open_r1/vlm_modules/vlm_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clh124/VQAThinker/HEAD/train/src/open-r1-multimodal/src/open_r1/vlm_modules/vlm_module.py --------------------------------------------------------------------------------