├── supervised_finetuning ├── qwen2_5-vl │ ├── src │ │ ├── __init__.py │ │ ├── serve │ │ │ ├── __init__.py │ │ │ └── app.py │ │ ├── train │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ ├── train_utils.py │ │ │ └── params.py │ │ ├── merge_lora_weights.py │ │ └── utils.py │ ├── scripts │ │ ├── merge_lora.sh │ │ ├── zero2.json │ │ ├── zero2_offload.json │ │ ├── zero3.json │ │ ├── zero3_offload.json │ │ └── finetune_lora.sh │ └── environment.yaml ├── phi-3-vision │ ├── src │ │ ├── __init__.py │ │ ├── serve │ │ │ ├── __init__.py │ │ │ └── cli.py │ │ ├── training │ │ │ ├── __init__.py │ │ │ ├── params.py │ │ │ └── train_utils.py │ │ ├── model │ │ │ └── Phi3_vision │ │ │ │ ├── preprocessor_config.json │ │ │ │ ├── special_tokens_map.json │ │ │ │ └── config.json │ │ ├── merge_lora_weights.py │ │ └── utils.py │ ├── scripts │ │ ├── zero2.json │ │ ├── zero2_fp8.json │ │ ├── zero3.json │ │ ├── finetune.sh │ │ └── zero3_offload.json │ └── environment.yaml ├── llava_next-8b │ ├── finetune_lora.sh │ ├── data │ │ └── dataset_info.json │ ├── llama-llava-next-8b_lora_merge.yaml │ ├── llama-llava-next-8b.yaml │ └── environment.yaml ├── minicpm_v2_6 │ ├── data │ │ └── dataset_info.json │ ├── finetune_lora.sh │ ├── minicpm-v-v2_6-lora_merge.yaml │ └── minicpm-v-v2_6.yaml └── enironment │ └── Dockerfile ├── overview.png ├── dataset_comparsion.png ├── data_generation_pipeline ├── data_pipeline.png └── README.md ├── public_benchmarks ├── CharXiv │ └── README.md ├── ChartBench │ └── README.md ├── ChartX │ └── README.md ├── ReachQA │ └── README.md ├── ChartQA │ └── README.md └── ECDBench │ └── README.md ├── datasets ├── README.md └── convert_to_format.py ├── LICENSE └── evaluation ├── ChartQA ├── inference_on_chartqa_minicpm_v2_6.py ├── bash_evaluation.sh ├── inference_on_chartqa_phi3v.py ├── inference_on_chartqa_llava_next.py ├── inference_on_chartqa_qwen2_5_vl.py └── evaluate_on_chartqa.py ├── ChartX ├── inference_on_chartx_minicpm_v2_6.py ├── bash_evaluation.sh ├── inference_on_chartx_phi3v.py ├── inference_on_chartx_llava_next.py ├── inference_on_chartx_qwen2_5_vl.py └── evaluate_on_chartx.py ├── CharXiv ├── eval_utils │ ├── evaluate.py │ ├── score_utils.py │ └── reasoning_utils.py ├── inference_on_charxiv_minicpm_v2_6.py ├── inference_on_charxiv_llava-next.py ├── inference_on_charxiv_phi3v.py ├── inference_on_charxiv_qwen2_5_vl.py └── bash_evaluation.sh ├── ECDBench ├── inference_on_ecdbench_minicpm_v2_6.py ├── bash_evaluation.sh ├── inference_on_ecdbench_phi3v.py ├── inference_on_ecdbench_llava_next.py └── inference_on_ecdbench_qwen2_5_vl.py ├── ReachQA ├── inference_on_reachqa_minicpm_v2_6.py ├── bash_evaluation.sh ├── inference_on_reachqa_phi3v.py ├── inference_on_reachqa_llava_next.py └── inference_on_reachqa_qwen2_5_vl.py └── ChartBench ├── inference_on_chartbench_minicpm_v2_6.py ├── bash_evaluation.sh ├── inference_on_chartbench_phi3v.py ├── inference_on_chartbench_llava_next.py └── inference_on_chartbench_qwen2_5_vl.py /supervised_finetuning/qwen2_5-vl/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /supervised_finetuning/phi-3-vision/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /supervised_finetuning/phi-3-vision/src/serve/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /supervised_finetuning/qwen2_5-vl/src/serve/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /supervised_finetuning/qwen2_5-vl/src/train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /supervised_finetuning/phi-3-vision/src/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuweiyang-anu/ECD/HEAD/overview.png -------------------------------------------------------------------------------- /dataset_comparsion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuweiyang-anu/ECD/HEAD/dataset_comparsion.png -------------------------------------------------------------------------------- /data_generation_pipeline/data_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuweiyang-anu/ECD/HEAD/data_generation_pipeline/data_pipeline.png -------------------------------------------------------------------------------- /public_benchmarks/CharXiv/README.md: -------------------------------------------------------------------------------- 1 | ## CharXiv 2 | You need to download the test dataset of CharXiv from the link: 'https://huggingface.co/datasets/princeton-nlp/CharXiv' and put images into the 'images' folder. -------------------------------------------------------------------------------- /public_benchmarks/ChartBench/README.md: -------------------------------------------------------------------------------- 1 | ## ChartBench 2 | You need to download the test dataset of ChartBench from the link: 'https://huggingface.co/datasets/SincereX/ChartBench' and organized in the manner of 'test_data.json'. -------------------------------------------------------------------------------- /public_benchmarks/ChartX/README.md: -------------------------------------------------------------------------------- 1 | ## ChartX 2 | You need to download the test dataset of ChartX from the link: 'https://huggingface.co/datasets/U4R/ChartX' and organized in the manner of 'ChartX_annotation_test.json'. 3 | -------------------------------------------------------------------------------- /public_benchmarks/ReachQA/README.md: -------------------------------------------------------------------------------- 1 | ## ReachQA 2 | You need to download the test dataset of ReachQA from the link: 'https://huggingface.co/datasets/hewei2001/ReachQA' and organized in the manner of 'test_data/test_data.json'. -------------------------------------------------------------------------------- /public_benchmarks/ChartQA/README.md: -------------------------------------------------------------------------------- 1 | ## ChartQA 2 | You need to download the test dataset of ChartQA from the link: 'https://huggingface.co/datasets/ahmed-masry/ChartQA' and organized in the manner of 'test/test_data.json', image should be put in the folder 'test/png/'. -------------------------------------------------------------------------------- /public_benchmarks/ECDBench/README.md: -------------------------------------------------------------------------------- 1 | ## ECDBench 2 | You need to download the test dataset (ECDBench) of ECD from the link: 'https://huggingface.co/datasets/ChartFoundation/ECDBench' and put the "rendered_images" under this folder, the QA data is organized in 'ECD_Bench_All.json'. -------------------------------------------------------------------------------- /supervised_finetuning/llava_next-8b/finetune_lora.sh: -------------------------------------------------------------------------------- 1 | export WANDB_PROJECT="VLM-SFT-on-ECD" 2 | export WANDB_API_KEY="your_wandb_api_key" # need more setup 3 | 4 | llamafactory-cli train llama-llava-next-8b.yaml 5 | llamafactory-cli export llama-llava-next-8b_lora_merge.yaml 6 | -------------------------------------------------------------------------------- /supervised_finetuning/llava_next-8b/data/dataset_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "ECD": { 3 | "file_name": "ECD/datasets/ECD_qa_data_all_formatted_for_llamafactory.json", 4 | "formatting": "sharegpt", 5 | "columns": { 6 | "messages": "conversations", 7 | "images": "images" 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /supervised_finetuning/minicpm_v2_6/data/dataset_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "ECD": { 3 | "file_name": "ECD/datasets/ECD_qa_data_all_formatted_for_llamafactory.json", 4 | "formatting": "sharegpt", 5 | "columns": { 6 | "messages": "conversations", 7 | "images": "images" 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /supervised_finetuning/qwen2_5-vl/scripts/merge_lora.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct" 4 | export PYTHONPATH=src:$PYTHONPATH 5 | 6 | python src/merge_lora_weights.py \ 7 | --model-path ./output/lora_qwen2_5_vl_on_ECD \ 8 | --model-base $MODEL_NAME \ 9 | --save-model-path ./output/lora_qwen2_5_vl_on_ECD-merged \ 10 | --safe-serialization -------------------------------------------------------------------------------- /supervised_finetuning/minicpm_v2_6/finetune_lora.sh: -------------------------------------------------------------------------------- 1 | # huggingface-cli login 2 | export CUDA_VISIBLE_DEVICES=0 3 | export WANDB_PROJECT="VLM-SFT-on-ECD" 4 | export WANDB_API_KEY="your_wandb_api_key" # need more setup 5 | export HUGGINGFACE_HUB_TOKEN="your_huggingface_api_key" # need more setup 6 | 7 | llamafactory-cli train minicpm-v-v2_6.yaml 8 | llamafactory-cli export minicpm-v-v2_6-lora_merge.yaml 9 | -------------------------------------------------------------------------------- /supervised_finetuning/minicpm_v2_6/minicpm-v-v2_6-lora_merge.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name_or_path: openbmb/MiniCPM-V-2_6 3 | adapter_name_or_path: output/lora_minicpm_v2_6_on_ECD 4 | template: minicpm_v 5 | finetuning_type: lora 6 | trust_remote_code: true 7 | 8 | ### export 9 | export_dir: output/lora_minicpm_v2_6_on_ECD-merged 10 | export_size: 2 11 | export_device: cpu 12 | export_legacy_format: false -------------------------------------------------------------------------------- /supervised_finetuning/llava_next-8b/llama-llava-next-8b_lora_merge.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name_or_path: llava-hf/llama3-llava-next-8b-hf 3 | adapter_name_or_path: output/lora_llava-next-8b_on_ECD 4 | template: llava_next 5 | finetuning_type: lora 6 | trust_remote_code: true 7 | 8 | ### export 9 | export_dir: output/lora_llava-next-8b_on_ECD-merged 10 | export_size: 2 11 | export_device: cpu 12 | export_legacy_format: false -------------------------------------------------------------------------------- /supervised_finetuning/qwen2_5-vl/src/train/constants.py: -------------------------------------------------------------------------------- 1 | IGNORE_INDEX = -100 2 | 3 | DEFAULT_IM_START_TOKEN = "<|im_start|>" 4 | DEFAULT_IM_END_TOKEN = "<|im_end|>" 5 | DEFAULT_IMAGE_TOKEN = "<|image_pad|>" 6 | DEFAULT_VIDEO_TOKEN = "<|video_pad|>" 7 | LLAVA_IMAGE_TOKEN = "" 8 | LLAVA_VIDEO_TOKEN = "