├── .gitignore
├── DocOwl
├── DocLLM
│ ├── LLMDoc.jsonl
│ ├── export_docowl.jsonl
│ ├── export_minigpt4.jsonl
│ ├── export_mplug_owl.jsonl
│ └── images
│ │ ├── chart_0.png
│ │ ├── chart_13.png
│ │ ├── chart_15.png
│ │ ├── chart_16.png
│ │ ├── chart_20.png
│ │ ├── chart_26.png
│ │ ├── chart_31.png
│ │ ├── chart_32.png
│ │ ├── chart_34.png
│ │ ├── chart_35.png
│ │ ├── chart_37.png
│ │ ├── chart_41.png
│ │ ├── chart_44.png
│ │ ├── chart_46.png
│ │ ├── chart_49.png
│ │ ├── chart_57.png
│ │ ├── chart_69.png
│ │ ├── chart_76.png
│ │ ├── chart_80.png
│ │ ├── document_0.png
│ │ ├── document_1.png
│ │ ├── document_10.png
│ │ ├── document_19.png
│ │ ├── document_2.png
│ │ ├── document_22.png
│ │ ├── document_27.png
│ │ ├── document_35.png
│ │ ├── document_40.png
│ │ ├── document_43.png
│ │ ├── document_44.png
│ │ ├── document_47.png
│ │ ├── document_52.png
│ │ ├── document_57.png
│ │ ├── document_67.png
│ │ ├── document_7.png
│ │ ├── document_8.png
│ │ ├── document_86.png
│ │ ├── document_91.png
│ │ ├── document_97.png
│ │ ├── natural_1.png
│ │ ├── natural_10.png
│ │ ├── natural_14.png
│ │ ├── natural_15.png
│ │ ├── natural_21.png
│ │ ├── natural_32.png
│ │ ├── natural_34.png
│ │ ├── natural_38.png
│ │ ├── natural_42.png
│ │ ├── natural_44.png
│ │ ├── natural_5.png
│ │ ├── natural_56.png
│ │ ├── natural_69.png
│ │ ├── natural_75.png
│ │ ├── natural_77.png
│ │ ├── natural_79.png
│ │ ├── natural_8.png
│ │ ├── natural_81.png
│ │ ├── natural_90.png
│ │ ├── natural_95.png
│ │ ├── screenshot_1.png
│ │ ├── screenshot_11.png
│ │ ├── screenshot_25.png
│ │ ├── screenshot_27.png
│ │ ├── screenshot_28.png
│ │ ├── screenshot_30.png
│ │ ├── screenshot_31.png
│ │ ├── screenshot_34.png
│ │ ├── screenshot_4.png
│ │ ├── screenshot_44.png
│ │ ├── screenshot_47.png
│ │ ├── screenshot_72.png
│ │ ├── screenshot_77.png
│ │ ├── screenshot_8.png
│ │ ├── screenshot_83.png
│ │ ├── screenshot_87.png
│ │ ├── screenshot_91.png
│ │ ├── screenshot_96.png
│ │ ├── table_0.png
│ │ ├── table_13.png
│ │ ├── table_17.png
│ │ ├── table_29.png
│ │ ├── table_39.png
│ │ ├── table_42.png
│ │ ├── table_50.png
│ │ ├── table_54.png
│ │ ├── table_56.png
│ │ ├── table_57.png
│ │ ├── table_60.png
│ │ ├── table_7.png
│ │ ├── table_73.png
│ │ ├── table_75.png
│ │ ├── table_76.png
│ │ ├── table_79.png
│ │ ├── table_87.png
│ │ ├── table_89.png
│ │ ├── table_97.png
│ │ └── table_98.png
├── README.md
└── assets
│ ├── -twitter-blue.svg
│ ├── Demo-ModelScope-brightgreen.svg
│ ├── LICENSE-Apache License-blue.svg
│ ├── Paper-Arxiv-orange.svg
│ ├── Paper-PDF-orange.svg
│ ├── cases_git.jpg
│ ├── mPLUG_new1.png
│ ├── modelscopeIcon.svg
│ └── overview.jpg
├── DocOwl1.5
├── README.md
├── app.py
├── assets
│ ├── Paper-Arxiv-orange.svg
│ ├── doc_instruct.png
│ ├── modelscope.png
│ └── radar.png
├── docowl_benchmark_evaluate.py
├── docowl_doclocal4k_evaluate.py
├── docowl_infer.py
├── evaluation
│ ├── benchmarks_eval.py
│ ├── due_benchmarks_eval.py
│ ├── due_evaluator
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── __version__.py
│ │ ├── due_evaluator.py
│ │ ├── py.typed
│ │ ├── scorers
│ │ │ ├── __init__.py
│ │ │ ├── accuracy_scorer.py
│ │ │ ├── anls_scorer.py
│ │ │ ├── base_scorer.py
│ │ │ ├── fscorer.py
│ │ │ ├── geval_scorer.py
│ │ │ ├── group_anls.py
│ │ │ ├── mean_fscorer.py
│ │ │ └── wtq_scorer.py
│ │ └── utils.py
│ └── evaluator.py
├── model_worker.py
├── mplug_docowl
│ ├── __init__.py
│ ├── constants.py
│ ├── conversation.py
│ ├── mm_utils.py
│ ├── model
│ │ ├── __init__.py
│ │ ├── builder.py
│ │ ├── configuration_mplug_docowl.py
│ │ ├── convert_mplug_docowl_weight_to_hf.py
│ │ ├── convert_mplug_docowl_weight_to_hf_v2.py
│ │ ├── modeling_attn_mask_utils.py
│ │ ├── modeling_llama2.py
│ │ ├── modeling_mplug_docowl.py
│ │ ├── utils.py
│ │ └── visual_encoder.py
│ ├── processor.py
│ ├── train
│ │ ├── llama_flash_attn_monkey_patch.py
│ │ ├── mplug_docowl_trainer.py
│ │ └── train_docowl.py
│ └── utils.py
└── scripts
│ ├── finetune_docowl.sh
│ ├── finetune_docowl_lora.sh
│ ├── zero2.json
│ ├── zero3.json
│ └── zero3_offload.json
├── DocOwl2
├── README.md
├── assets
│ ├── Paper-Arxiv-orange.svg
│ ├── docowl2_effiency_and_case.jpg
│ └── modelscope.png
├── docowl_benchmark_evaluate.py
└── evaluation
│ ├── benchmarks_eval.py
│ ├── dude_eval.py
│ ├── due_benchmarks_eval.py
│ ├── due_evaluator
│ ├── __init__.py
│ ├── __main__.py
│ ├── __version__.py
│ ├── due_evaluator.py
│ ├── py.typed
│ ├── scorers
│ │ ├── __init__.py
│ │ ├── accuracy_scorer.py
│ │ ├── anls_scorer.py
│ │ ├── base_scorer.py
│ │ ├── fscorer.py
│ │ ├── geval_scorer.py
│ │ ├── group_anls.py
│ │ ├── mean_fscorer.py
│ │ └── wtq_scorer.py
│ └── utils.py
│ ├── evaluator.py
│ ├── mpdocvqa_eval.py
│ └── newsvideoqa_eval.py
├── LICENSE
├── PaperOwl
├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── assets
│ ├── Paper-Arxiv-orange.svg
│ ├── Paper-PDF-orange.svg
│ ├── data_process.png
│ ├── diagram_distribution.png
│ ├── intro_case.jpeg
│ └── paper_category.png
├── configs
│ └── sft
│ │ └── release.yaml
├── ds_config.json
├── mplug_owl
│ ├── __init__.py
│ ├── configuration_mplug_owl.py
│ ├── modeling_mplug_owl.py
│ ├── processing_mplug_owl.py
│ └── tokenization_mplug_owl.py
├── pipeline
│ ├── __init__.py
│ ├── data_utils
│ │ ├── __init__.py
│ │ ├── processors
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── caption_processor.py
│ │ │ ├── default_processor.py
│ │ │ └── doc_processor.py
│ │ ├── randaugment.py
│ │ ├── registry.py
│ │ └── xgpt3_dataset.py
│ ├── eval_utils
│ │ ├── __init__.py
│ │ ├── due_evaluator
│ │ │ ├── __init__.py
│ │ │ ├── __main__.py
│ │ │ ├── __version__.py
│ │ │ ├── due_evaluator.py
│ │ │ ├── py.typed
│ │ │ ├── scorers
│ │ │ │ ├── __init__.py
│ │ │ │ ├── accuracy_scorer.py
│ │ │ │ ├── anls_scorer.py
│ │ │ │ ├── base_scorer.py
│ │ │ │ ├── fscorer.py
│ │ │ │ ├── geval_scorer.py
│ │ │ │ ├── group_anls.py
│ │ │ │ ├── mean_fscorer.py
│ │ │ │ └── wtq_scorer.py
│ │ │ └── utils.py
│ │ ├── run_evaluation.py
│ │ └── tools.py
│ ├── evaluation.py
│ ├── interface.py
│ ├── train.py
│ ├── trainer.py
│ └── utils.py
├── scripts
│ ├── train_it.sh
│ └── train_it_v100.sh
└── serve
│ ├── __init__.py
│ ├── conversation.py
│ ├── gradio_css.py
│ ├── gradio_patch.py
│ ├── io_utils.py
│ ├── model_utils.py
│ ├── model_worker.py
│ ├── serve_utils.py
│ └── web_server.py
├── README.md
├── TinyChart
├── README.md
├── app.py
├── assets
│ ├── Paper-Arxiv-orange.svg
│ ├── cases.png
│ └── perform_and_speed.png
├── images
│ ├── albums.png
│ ├── college.png
│ ├── diseases.png
│ ├── economy.png
│ ├── immigrants.png
│ ├── market.png
│ ├── sails.png
│ ├── sports.png
│ └── workers.png
├── inference.ipynb
├── pyproject.toml
├── scripts
│ ├── convert_model_config.py
│ ├── evaluate.sh
│ ├── merge_jsonl_sort.py
│ ├── split_jsonl_dataset.py
│ ├── train.sh
│ ├── vit_add_tome.py
│ └── zero3_offload_decay.json
└── tinychart
│ ├── __init__.py
│ ├── arguments.py
│ ├── constants.py
│ ├── conversation.py
│ ├── data
│ ├── __init__.py
│ ├── dataset.py
│ ├── preprocess
│ │ ├── __init__.py
│ │ ├── default.py
│ │ ├── phi.py
│ │ └── v1.py
│ └── process.py
│ ├── eval
│ ├── __init__.py
│ ├── eval_chart2table.py
│ ├── eval_chart2text.py
│ ├── eval_metric.py
│ ├── eval_model.py
│ ├── run_eval.py
│ └── run_tiny_chart.py
│ ├── mm_utils.py
│ ├── model
│ ├── __init__.py
│ ├── builder.py
│ ├── language_model
│ │ ├── __init__.py
│ │ └── llava_phi.py
│ ├── llava_arch.py
│ ├── model_factory.py
│ ├── multimodal_encoder
│ │ ├── builder.py
│ │ ├── merge.py
│ │ └── siglip_encoder.py
│ └── multimodal_projector
│ │ └── builder.py
│ ├── train
│ ├── __init__.py
│ ├── llava_trainer.py
│ ├── train.py
│ └── train_utils.py
│ └── utils.py
├── UReader
├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── app.py
├── assets
│ ├── -twitter-blue.svg
│ ├── Demo-ModelScope-brightgreen.svg
│ ├── LICENSE-Apache License-blue.svg
│ ├── Paper-Arxiv-orange.svg
│ ├── Paper-PDF-orange.svg
│ ├── intro_case.jpg
│ ├── model.png
│ ├── modelscope.png
│ └── modelscopeIcon.svg
├── configs
│ └── sft
│ │ └── release.yaml
├── ds_config.json
├── examples
│ ├── Yao_Ming.jpeg
│ ├── ca.jpeg
│ ├── docowl.jpg
│ ├── fridge.jpg
│ ├── fruits.jpg
│ ├── laundry.jpeg
│ ├── monalisa-fun.jpg
│ ├── monday.jpg
│ ├── mug_ad.jpeg
│ ├── owl.jpg
│ ├── rap.jpeg
│ ├── table.jpg
│ ├── titanic.jpeg
│ ├── vga.jpeg
│ └── website.jpg
├── mplug_owl
│ ├── __init__.py
│ ├── configuration_mplug_owl.py
│ ├── modeling_mplug_owl.py
│ ├── processing_mplug_owl.py
│ └── tokenization_mplug_owl.py
├── pipeline
│ ├── __init__.py
│ ├── data_utils
│ │ ├── __init__.py
│ │ ├── processors
│ │ │ ├── __init__.py
│ │ │ ├── builder.py
│ │ │ ├── caption_processor.py
│ │ │ ├── default_processor.py
│ │ │ └── doc_processor.py
│ │ ├── randaugment.py
│ │ ├── registry.py
│ │ └── xgpt3_dataset.py
│ ├── eval_utils
│ │ ├── __init__.py
│ │ ├── due_evaluator
│ │ │ ├── __init__.py
│ │ │ ├── __main__.py
│ │ │ ├── __version__.py
│ │ │ ├── due_evaluator.py
│ │ │ ├── py.typed
│ │ │ ├── scorers
│ │ │ │ ├── __init__.py
│ │ │ │ ├── accuracy_scorer.py
│ │ │ │ ├── anls_scorer.py
│ │ │ │ ├── base_scorer.py
│ │ │ │ ├── fscorer.py
│ │ │ │ ├── geval_scorer.py
│ │ │ │ ├── group_anls.py
│ │ │ │ ├── mean_fscorer.py
│ │ │ │ └── wtq_scorer.py
│ │ │ └── utils.py
│ │ ├── run_evaluation.py
│ │ └── tools.py
│ ├── evaluation.py
│ ├── interface.py
│ ├── train.py
│ ├── trainer.py
│ └── utils.py
├── scripts
│ ├── eval
│ │ └── eval_benchmark.sh
│ ├── train_it.sh
│ └── train_it_v100.sh
└── serve
│ ├── __init__.py
│ ├── conversation.py
│ ├── gradio_css.py
│ ├── gradio_patch.py
│ ├── io_utils.py
│ ├── model_utils.py
│ ├── model_worker.py
│ ├── serve_utils.py
│ └── web_server.py
└── assets
├── docowl1.5_chat_case.png
├── docowl2_github_case.jpg
├── huggingface.png
├── mPLUG_new1.png
└── modelscope.png
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | #IDE
7 | .idea/
8 | .DS_Store
9 |
10 | #SOFA
11 | SOFA/build
12 | SOFA/sofa.egg-info
13 | SOFA/dist
14 |
15 | #Megatron
16 | megatron/fused_kernels/build/
17 |
18 | *.ipynb
19 | *.pth
20 | *.pt
21 | *.tar
22 | *.out
23 | *.log
24 | *.txt
25 | tensorboard/
26 | save_model/
27 |
28 | debug/
29 | language_evaluation
30 | evalcap
31 | .ipynb_checkpoints
32 |
33 | core-*
34 | .nfs*
35 |
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_0.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_13.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_15.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_16.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_20.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_26.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_26.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_31.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_31.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_32.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_34.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_34.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_35.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_35.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_37.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_37.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_41.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_41.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_44.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_44.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_46.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_46.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_49.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_49.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_57.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_69.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_69.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_76.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/chart_80.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_80.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_0.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_1.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_10.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_19.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_2.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_22.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_27.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_27.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_35.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_35.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_40.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_40.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_43.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_43.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_44.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_44.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_47.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_47.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_52.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_52.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_57.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_67.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_67.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_7.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_8.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_86.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_86.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_91.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_91.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/document_97.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_97.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_1.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_10.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_14.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_15.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_21.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_32.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_34.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_34.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_38.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_38.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_42.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_42.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_44.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_44.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_5.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_56.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_56.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_69.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_69.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_75.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_75.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_77.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_77.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_79.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_79.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_8.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_81.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_81.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_90.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_90.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/natural_95.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_95.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_1.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_11.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_25.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_25.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_27.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_27.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_28.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_28.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_30.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_31.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_31.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_34.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_34.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_4.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_44.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_44.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_47.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_47.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_72.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_77.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_77.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_8.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_83.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_83.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_87.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_87.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_91.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_91.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/screenshot_96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_96.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_0.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_13.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_17.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_29.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_29.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_39.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_42.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_42.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_50.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_54.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_54.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_56.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_56.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_57.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_60.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_7.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_73.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_73.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_75.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_75.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_76.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_79.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_79.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_87.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_87.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_89.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_89.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_97.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_97.png
--------------------------------------------------------------------------------
/DocOwl/DocLLM/images/table_98.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_98.png
--------------------------------------------------------------------------------
/DocOwl/assets/-twitter-blue.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/DocOwl/assets/Demo-ModelScope-brightgreen.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/DocOwl/assets/LICENSE-Apache License-blue.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/DocOwl/assets/Paper-Arxiv-orange.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/DocOwl/assets/Paper-PDF-orange.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/DocOwl/assets/cases_git.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/assets/cases_git.jpg
--------------------------------------------------------------------------------
/DocOwl/assets/mPLUG_new1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/assets/mPLUG_new1.png
--------------------------------------------------------------------------------
/DocOwl/assets/modelscopeIcon.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/DocOwl/assets/overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/assets/overview.jpg
--------------------------------------------------------------------------------
/DocOwl1.5/assets/Paper-Arxiv-orange.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/DocOwl1.5/assets/doc_instruct.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl1.5/assets/doc_instruct.png
--------------------------------------------------------------------------------
/DocOwl1.5/assets/modelscope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl1.5/assets/modelscope.png
--------------------------------------------------------------------------------
/DocOwl1.5/assets/radar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl1.5/assets/radar.png
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | from .__main__ import cli_main
2 | from .due_evaluator import DueEvaluator
3 |
4 | __all__ = ['DueEvaluator', 'cli_main']
5 |
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import argparse
5 | import sys
6 | from typing import Optional, Set
7 | import json
8 |
9 | from due_evaluator.due_evaluator import DueEvaluator
10 | from due_evaluator.utils import property_scores_to_string
11 |
12 |
13 | def parse_args():
14 | """Parse CLI arguments.
15 |
16 | Returns:
17 | namespace: namespace with parsed variables.
18 |
19 | """
20 | parser = argparse.ArgumentParser('Document Understanding Evaluator')
21 | parser.add_argument(
22 | '--out-files',
23 | '-o',
24 | type=argparse.FileType('r', encoding='utf-8'),
25 | required=True,
26 | nargs='+',
27 | help='Out file to evaluate',
28 | )
29 | parser.add_argument(
30 | '--reference', '-r', type=argparse.FileType('r', encoding='utf-8'), required=True, help='Reference file',
31 | )
32 | parser.add_argument('--metric', '-m', type=str, default='F1', choices=['F1', 'MEAN-F1', 'ANLS', 'WTQ', 'GROUP-ANLS'])
33 | parser.add_argument(
34 | '--return-score',
35 | default='F1',
36 | choices=['F1', 'mean-F1', 'ANLS', 'mean-Precision', 'mean-Recall', 'WTQ'],
37 | help='Return WR-like mean-F1 score',
38 | )
39 | parser.add_argument('--line-by-line', action='store_true', default=False, help='Return retults example-based')
40 | parser.add_argument(
41 | '--columns', type=str, nargs='+', default=['Precision', 'Recall', 'F1'], help='Columns',
42 | )
43 | parser.add_argument(
44 | '--print-format',
45 | default='text',
46 | type=str,
47 | choices=['text', 'latex', 'json'],
48 | help='Print feature table in the given format',
49 | )
50 | parser.add_argument('--properties', nargs='+', type=str, help='Property set to be limitted to')
51 | parser.add_argument(
52 | '--ignore-case', '-i', action='store_true', default=False, help='Property set to be limitted to',
53 | )
54 | return parser.parse_args()
55 |
56 |
57 | def cli_main(args: argparse.Namespace):
58 | """CLI main.
59 |
60 | Args:
61 | args: cli arguments
62 | """
63 | reference = [json.loads(line) for line in args.reference]
64 |
65 | evaluators = []
66 | for out_file in args.out_files:
67 | predictions = [json.loads(line) for line in out_file]
68 |
69 | property_set: Optional[Set[str]]
70 | if args.properties:
71 | property_set = args.properties
72 | else:
73 | property_set = None
74 |
75 | evaluators.append(
76 | DueEvaluator(reference, predictions, property_set, args.ignore_case, out_file.name, args.metric)
77 | )
78 |
79 | prop_str = property_scores_to_string(evaluators, args.print_format, args.columns)
80 | if args.print_format != 'json':
81 | print(prop_str, file=sys.stderr)
82 |
83 | if args.line_by_line:
84 | for idx, score in enumerate(evaluators[0].line_by_line()):
85 | print(f'{idx}: {score}', file=sys.stderr)
86 | return prop_str
87 |
88 |
89 | def main() -> None:
90 | """Main."""
91 | args = parse_args()
92 | cli_main(args)
93 |
94 |
95 | if __name__ == '__main__':
96 | main()
97 |
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/__version__.py:
--------------------------------------------------------------------------------
1 | """Version specification."""
2 |
3 | VERSION = (0, 0, 8)
4 | __version__ = '.'.join(map(str, VERSION))
5 |
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl1.5/evaluation/due_evaluator/py.typed
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/scorers/__init__.py:
--------------------------------------------------------------------------------
1 | from .anls_scorer import AnlsScorer
2 | from .base_scorer import BaseScorer
3 | from .fscorer import FScorer
4 | from .mean_fscorer import MeanFScorer
5 | from .wtq_scorer import WtqScorer
6 | from .group_anls import GroupAnlsScorer
7 | from .geval_scorer import GevalScorer
8 |
9 | __all__ = ['AnlsScorer', 'BaseScorer', 'FScorer', 'MeanFScorer', 'WtqScorer', 'GevalScorer', 'GroupAnlsScorer']
10 |
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/scorers/accuracy_scorer.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import List
3 | from operator import itemgetter
4 |
5 | from .base_scorer import BaseScorer
6 |
7 | logger = logging.getLogger(__name__)
8 |
9 |
10 | class AccuracyScorer(BaseScorer):
11 | """Accuracy Scorer."""
12 |
13 | def __init__(self, threshold: float = 0.5):
14 | self.__scores: List[float] = []
15 | self.threshold = threshold
16 |
17 | @property
18 | def scores(self):
19 | return self.__scores
20 |
21 | def check_denotation(self, out: list, ref: list) -> bool:
22 | return out == ref
23 |
24 | def add(self, out_items: List[dict], ref_items: List[dict]):
25 | """Add more items for computing corpus level scores.
26 |
27 | Args:
28 | out_items: outs from a single document (line)
29 | ref_items: reference of the evaluated document (line)
30 |
31 | """
32 | out_ann = sorted(out_items['annotations'], key=itemgetter('key'))
33 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key'))
34 | assert [a['key'] for a in out_ann] == [a['key'] for a in ref_ann]
35 |
36 | for out, ref in zip(out_ann, ref_ann):
37 | o_values = [v['value'] for v in out['values']]
38 | r_values = [v['value'] for v in ref['values']]
39 | score = int(self.check_denotation(o_values, r_values))
40 | self.__scores.append(score)
41 |
42 | def score(self) -> float:
43 | if self.__scores:
44 | return sum(self.__scores) / len(self.__scores)
45 | return 0.0
46 |
47 | @classmethod
48 | def support_feature_scores(cls) -> bool:
49 | return False
50 |
51 | @classmethod
52 | def metric_name(cls) -> str:
53 | return "Accuracy"
54 |
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/scorers/anls_scorer.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import List
3 | from operator import itemgetter
4 |
5 | import textdistance
6 |
7 | from due_evaluator.scorers.base_scorer import BaseScorer
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | class AnlsScorer(BaseScorer):
13 | """ANSL Scorer."""
14 |
15 | def __init__(self, threshold: float = 0.5):
16 | self.__scores: List[float] = []
17 | self.threshold = threshold
18 |
19 | @property
20 | def scores(self):
21 | return self.__scores
22 |
23 | def add(self, out_items: List[dict], ref_items: List[dict]):
24 | """Add more items for computing corpus level scores.
25 |
26 | Args:
27 | out_items: outs from a single document (line)
28 | ref_items: reference of the evaluated document (line)
29 |
30 | """
31 | out_ann = sorted(out_items['annotations'], key=itemgetter('key'))
32 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key'))
33 | assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann]
34 |
35 | """try:
36 | # assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann]
37 | out_keys = [a['key'][:100] for a in out_ann]
38 | ref_keys = [a['key'][:100] for a in ref_ann]
39 | # assert out_keys == ref_keys
40 | for i in range(len(out_keys)):
41 | try:
42 | assert out_keys[i] == ref_keys[i]
43 | except AssertionError as e:
44 | print(out_keys[i])
45 | print(ref_keys[i])
46 | print('==============')
47 | # exit(0)
48 |
49 | except AssertionError as e:
50 | print('key of pred and gt unmatched:')
51 | # print('pred:', out_keys)
52 | # print('gt:', ref_keys)
53 | exit(0)"""
54 |
55 | for out, ref in zip(out_ann, ref_ann):
56 | assert len(out['values']) == 1
57 | val = out['values'][0]['value']
58 | possible_vals = ref['values'][0]['value_variants']
59 | best_score = max([textdistance.levenshtein.normalized_similarity(val, pos)
60 | for pos in possible_vals])
61 | if 1 - self.threshold >= best_score:
62 | best_score = 0.0
63 | self.__scores.append(best_score)
64 |
65 | def score(self) -> float:
66 | if self.__scores:
67 | return sum(self.__scores) / len(self.__scores)
68 | return 0.0
69 |
70 | @classmethod
71 | def support_feature_scores(cls) -> bool:
72 | return False
73 |
74 | @classmethod
75 | def metric_name(cls) -> str:
76 | return "ANLS"
77 |
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/scorers/base_scorer.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from typing import List
3 |
4 |
5 | class BaseScorer(abc.ABC):
6 | """Abstract class for scorers."""
7 |
8 | @abc.abstractmethod
9 | def add(self, out_items: List[dict], ref_items: List[dict]):
10 | pass
11 |
12 | @abc.abstractmethod
13 | def score(self):
14 | pass
15 |
16 | @abc.abstractclassmethod
17 | def support_feature_scores(cls) -> bool:
18 | pass
19 |
20 | @abc.abstractclassmethod
21 | def metric_name(cls) -> str:
22 | pass
23 |
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/scorers/geval_scorer.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | import tempfile
3 | from collections import defaultdict
4 | import os
5 |
6 | from due_evaluator.scorers.fscorer import FScorer
7 | from due_evaluator.scorers.base_scorer import BaseScorer
8 |
9 |
10 | GEVAL_BINARY = os.getenv('GEVAL_BINARY', '/data/shared/bin/geval')
11 | GEVAL_METRIC = os.getenv('GEVAL_METRIC', 'MultiLabel-F1:cN')
12 |
13 |
14 | class GevalScorer(BaseScorer):
15 | def __init__(self):
16 | self.__ref = tempfile.NamedTemporaryFile('w+t')
17 | self.__out = tempfile.NamedTemporaryFile('w+t')
18 | self.__ref_data = defaultdict(set)
19 | self.__out_data = defaultdict(set)
20 |
21 | @staticmethod
22 | def add_to_geval_data(data, line):
23 | name = line['name']
24 | for annotation in line['annotations']:
25 | for idx, val in enumerate(annotation['values'], 1):
26 | for child in val['children']:
27 | new_name = child['key'] + '__' + str(idx) if '__' in child['key'] else child['key']
28 | if child['values'] and child['values'] != ['']:
29 | new_value = '|'.join([v['value'].replace(' ', '_') for v in child['values']])
30 | data[name].add(f'{new_name}={new_value}')
31 |
32 | def save_geval_files(self):
33 | for name in sorted(self.__ref_data.keys()):
34 | self.__ref.write(' '.join(self.__ref_data[name]) + '\n')
35 | self.__out.write(' '.join(self.__out_data[name]) + '\n')
36 |
37 | def add(self, out_items: List[str], ref_items: List[str]):
38 | self.add_to_geval_data(self.__out_data, out_items)
39 | self.add_to_geval_data(self.__ref_data, ref_items)
40 |
41 | def support_feature_scores(cls) -> bool:
42 | return False
43 |
44 | def metric_name(cls) -> str:
45 | return "GEVAL"
46 |
47 | def run_geval(self):
48 | self.__ref.flush()
49 | self.__out.flush()
50 | try:
51 | return float(os.popen(f'{GEVAL_BINARY} -o {self.__out.name} -e {self.__ref.name} --metric {GEVAL_METRIC}').read())
52 | except:
53 | return -1
54 |
55 | def score(self) -> float:
56 | self.save_geval_files()
57 | return self.run_geval()
58 |
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/scorers/mean_fscorer.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from due_evaluator.scorers.fscorer import FScorer
4 | from due_evaluator.scorers.base_scorer import BaseScorer
5 |
6 |
7 | class MeanFScorer(BaseScorer):
8 | def __init__(self):
9 | self.__scores: List[float] = []
10 |
11 | def add(self, out_items: List[str], ref_items: List[str]):
12 | fscorer = FScorer()
13 | fscorer.add(out_items, ref_items)
14 | self.__scores.append(fscorer.f_score())
15 |
16 | def support_feature_scores(cls) -> bool:
17 | return False
18 |
19 | def metric_name(cls) -> str:
20 | return "MEAN-F1"
21 |
22 | def score(self) -> float:
23 | if self.__scores:
24 | return sum(self.__scores) / len(self.__scores)
25 | return 0.0
26 |
--------------------------------------------------------------------------------
/DocOwl1.5/evaluation/due_evaluator/utils.py:
--------------------------------------------------------------------------------
1 | from due_evaluator.scorers.fscorer import FScorer
2 | from typing import Dict, List, Optional, Sequence, Union
3 |
4 | import pandas as pd
5 |
6 | from due_evaluator.due_evaluator import DueEvaluator
7 |
8 |
9 | def dataframe_to_print(df: pd.DataFrame, print_format: Optional[str] = 'text') -> str:
10 | """Export dataframe to json or plain text.
11 |
12 | Args:
13 | df (pd.DataFrame): data
14 | print_format (str, optional): Print format. Defaults to 'text'.
15 |
16 | Raises:
17 | ValueError: unknown print_format
18 |
19 | Returns:
20 | str: printed version of dataframe
21 |
22 | """
23 | out: str
24 | if print_format == 'latex':
25 | out = df.reset_index().to_latex(index=False)
26 | elif print_format == 'text':
27 | out = df.reset_index().to_string(index=False)
28 | elif print_format == 'json':
29 | out = df.to_json(orient='index')
30 | else:
31 | raise ValueError()
32 | return out
33 |
34 |
35 | def property_scores_to_string(
36 | dues: List[DueEvaluator], print_format: str = 'text', columns: Sequence[str] = ('Precision', 'Recall', 'F-1'),
37 | ) -> str:
38 | """Print out scores per property.
39 |
40 | Args:
41 | dues: List of DueEvaluators
42 | print_format: output format: text or latex
43 | columns: a list of metrics to print
44 |
45 | Returns:
46 | str: string table with feature scores.
47 |
48 | """
49 | data = []
50 | for property_name in sorted(dues[0].property_scorers.keys()) + ['ALL']:
51 | row_data: Dict[str, Union[str, float]] = {}
52 | row_data['Label'] = property_name
53 | for due in dues:
54 | if len(dues) == 1:
55 | suffix = ''
56 | else:
57 | suffix = f' ({due.path})'
58 | if property_name == 'ALL':
59 | scorer = due.general_scorer
60 | else:
61 | scorer = due.property_scorers[property_name]
62 |
63 | row_data[scorer.metric_name() + suffix] = scorer.score()
64 | if isinstance(scorer, FScorer):
65 | if 'Precision' in columns:
66 | row_data['Precision' + suffix] = scorer.precision()
67 | if 'Recall' in columns:
68 | row_data['Recall' + suffix] = scorer.recall()
69 | data.append(row_data)
70 |
71 | df = pd.DataFrame(data)
72 | df.set_index('Label', drop=True, inplace=True)
73 |
74 | return dataframe_to_print(df, print_format)
75 |
--------------------------------------------------------------------------------
/DocOwl1.5/mplug_docowl/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import MPLUGDocOwlLlamaForCausalLM
2 | from .processor import DocProcessor
--------------------------------------------------------------------------------
/DocOwl1.5/mplug_docowl/constants.py:
--------------------------------------------------------------------------------
1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30
2 | WORKER_HEART_BEAT_INTERVAL = 15
3 |
4 | LOGDIR = "./demo_logs"
5 |
6 | # Model Constants
7 | IGNORE_INDEX = -100
8 | IMAGE_TOKEN_INDEX = -200
9 | DEFAULT_IMAGE_TOKEN = "<|image|>"
10 |
--------------------------------------------------------------------------------
/DocOwl1.5/mplug_docowl/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling_mplug_docowl import MPLUGDocOwlLlamaForCausalLM
2 | from .configuration_mplug_docowl import MPLUGDocOwlConfig
--------------------------------------------------------------------------------
/DocOwl1.5/mplug_docowl/model/utils.py:
--------------------------------------------------------------------------------
1 | from transformers import AutoConfig
2 |
3 |
4 | def auto_upgrade(config):
5 | cfg = AutoConfig.from_pretrained(config)
6 | if 'mplug_owl2' in config and 'mplug_owl2' not in cfg.model_type:
7 | assert cfg.model_type == 'mplug_owl2'
8 | print("You are using newer LLaVA code base, while the checkpoint of v0 is from older code base.")
9 | print("You must upgrade the checkpoint to the new code base (this can be done automatically).")
10 | confirm = input("Please confirm that you want to upgrade the checkpoint. [Y/N]")
11 | if confirm.lower() in ["y", "yes"]:
12 | print("Upgrading checkpoint...")
13 | assert len(cfg.architectures) == 1
14 | setattr(cfg.__class__, "model_type", "mplug_owl2")
15 | cfg.architectures[0] = 'LlavaLlamaForCausalLM'
16 | cfg.save_pretrained(config)
17 | print("Checkpoint upgraded.")
18 | else:
19 | print("Checkpoint upgrade aborted.")
20 | exit(1)
--------------------------------------------------------------------------------
/DocOwl1.5/scripts/finetune_docowl.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | if [ $MASTER_ADDR ];then
3 | echo $MASTER_ADDR
4 | echo $MASTER_PORT
5 | echo $WORLD_SIZE
6 | echo $RANK
7 | else
8 | MASTER_ADDR=127.0.0.1
9 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15
10 | WORLD_SIZE=1
11 | RANK=0
12 | fi
13 | # Change for multinode config
14 | NNODES=${WORLD_SIZE}
15 | NODE_RANK=${RANK}
16 | GPUS_PER_NODE=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
17 | # GPUS_PER_NODE=1
18 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
19 | echo $DISTRIBUTED_ARGS
20 |
21 | # change LOAD to your local path of DocOwl1.5-stage1
22 | LOAD='./mPLUG/DocOwl1.5-stage1'
23 |
24 | # batch size = per_device_train_batch_size x GPUS_PER_NODE x NNODES x gradient_accumulation_steps
25 | DATA_FILE=./DocDownstream-1.0/train.jsonl
26 | torchrun $DISTRIBUTED_ARGS mplug_docowl/train/train_docowl.py \
27 | --deepspeed ./scripts/zero2.json \
28 | --model_name_or_path $LOAD \
29 | --version v1 \
30 | --data_path $DATA_FILE \
31 | --image_folder './DocDownstream-1.0/' \
32 | --image_size 448 \
33 | --crop_anchors 'grid_9' \
34 | --add_global_img True \
35 | --add_textual_crop_indicator True \
36 | --bf16 True \
37 | --output_dir ./checkpoints/docowl1.5 \
38 | --num_train_epochs 3 \
39 | --per_device_train_batch_size 1 \
40 | --per_device_eval_batch_size 1 \
41 | --gradient_accumulation_steps 8 \
42 | --evaluation_strategy "no" \
43 | --save_strategy "steps" \
44 | --save_steps 500 \
45 | --save_total_limit 4 \
46 | --learning_rate 2e-5 \
47 | --weight_decay 0. \
48 | --warmup_ratio 0.03 \
49 | --lr_scheduler_type "cosine" \
50 | --logging_steps 1 \
51 | --tf32 True \
52 | --model_max_length 3600 \
53 | --gradient_checkpointing True \
54 | --tune_vision2text True \
55 | --freeze_vision_model True \
56 | --freeze_backbone False \
57 | --dataloader_num_workers 4 \
58 | --lazy_preprocess True \
59 | --report_to tensorboard
--------------------------------------------------------------------------------
/DocOwl1.5/scripts/finetune_docowl_lora.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | if [ $MASTER_ADDR ];then
3 | echo $MASTER_ADDR
4 | echo $MASTER_PORT
5 | echo $WORLD_SIZE
6 | echo $RANK
7 | else
8 | MASTER_ADDR=127.0.0.1
9 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15
10 | WORLD_SIZE=1
11 | RANK=0
12 | fi
13 | # Change for multinode config
14 | NNODES=${WORLD_SIZE}
15 | NODE_RANK=${RANK}
16 | GPUS_PER_NODE=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
17 | # GPUS_PER_NODE=1
18 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
19 | echo $DISTRIBUTED_ARGS
20 |
21 | # change LOAD to your local path of DocOwl1.5-stage1
22 | LOAD='./mPLUG/DocOwl1.5-stage1'
23 |
24 | # batch size = per_device_train_batch_size x GPUS_PER_NODE x NNODES x gradient_accumulation_steps
25 | DATA_FILE=./DocDownstream-1.0/train.jsonl
26 | torchrun $DISTRIBUTED_ARGS mplug_docowl/train/train_docowl.py \
27 | --lora_enable True --lora_r 128 --lora_alpha 256 --vision2text_lr 2e-5 \
28 | --deepspeed ./scripts/zero2.json \
29 | --model_name_or_path $LOAD \
30 | --version v1 \
31 | --data_path $DATA_FILE \
32 | --image_folder './DocDownstream-1.0/' \
33 | --image_size 448 \
34 | --crop_anchors 'grid_9' \
35 | --add_global_img True \
36 | --add_textual_crop_indicator True \
37 | --bf16 True \
38 | --output_dir ./checkpoints/docowl1.5-lora \
39 | --num_train_epochs 3 \
40 | --per_device_train_batch_size 1 \
41 | --per_device_eval_batch_size 1 \
42 | --gradient_accumulation_steps 8 \
43 | --evaluation_strategy "no" \
44 | --save_strategy "steps" \
45 | --save_steps 500 \
46 | --save_total_limit 4 \
47 | --learning_rate 1e-4 \
48 | --weight_decay 0. \
49 | --warmup_ratio 0.03 \
50 | --lr_scheduler_type "cosine" \
51 | --logging_steps 1 \
52 | --tf32 True \
53 | --model_max_length 3600 \
54 | --gradient_checkpointing True \
55 | --tune_vision2text True \
56 | --freeze_vision_model True \
57 | --freeze_backbone True \
58 | --dataloader_num_workers 4 \
59 | --lazy_preprocess True \
60 | --report_to tensorboard
--------------------------------------------------------------------------------
/DocOwl1.5/scripts/zero2.json:
--------------------------------------------------------------------------------
1 | {
2 | "fp16": {
3 | "enabled": "auto",
4 | "loss_scale": 0,
5 | "loss_scale_window": 1000,
6 | "initial_scale_power": 16,
7 | "hysteresis": 2,
8 | "min_loss_scale": 1
9 | },
10 | "bf16": {
11 | "enabled": "auto"
12 | },
13 | "train_micro_batch_size_per_gpu": "auto",
14 | "train_batch_size": "auto",
15 | "gradient_accumulation_steps": "auto",
16 | "zero_optimization": {
17 | "stage": 2,
18 | "overlap_comm": true,
19 | "contiguous_gradients": true,
20 | "sub_group_size": 1e9,
21 | "reduce_bucket_size": "auto"
22 | }
23 | }
--------------------------------------------------------------------------------
/DocOwl1.5/scripts/zero3.json:
--------------------------------------------------------------------------------
1 | {
2 | "fp16": {
3 | "enabled": "auto",
4 | "loss_scale": 0,
5 | "loss_scale_window": 1000,
6 | "initial_scale_power": 16,
7 | "hysteresis": 2,
8 | "min_loss_scale": 1
9 | },
10 | "bf16": {
11 | "enabled": "auto"
12 | },
13 | "train_micro_batch_size_per_gpu": "auto",
14 | "train_batch_size": "auto",
15 | "gradient_accumulation_steps": "auto",
16 | "zero_optimization": {
17 | "stage": 3,
18 | "overlap_comm": true,
19 | "contiguous_gradients": true,
20 | "sub_group_size": 1e9,
21 | "reduce_bucket_size": "auto",
22 | "stage3_param_persistence_threshold": "auto",
23 | "stage3_max_live_parameters": 1e9,
24 | "stage3_max_reuse_distance": 1e9,
25 | "stage3_prefetch_bucket_size": "auto",
26 | "stage3_gather_16bit_weights_on_model_save": true
27 | }
28 | }
--------------------------------------------------------------------------------
/DocOwl1.5/scripts/zero3_offload.json:
--------------------------------------------------------------------------------
1 | {
2 | "fp16": {
3 | "enabled": "auto",
4 | "loss_scale": 0,
5 | "loss_scale_window": 1000,
6 | "initial_scale_power": 16,
7 | "hysteresis": 2,
8 | "min_loss_scale": 1
9 | },
10 | "bf16": {
11 | "enabled": "auto"
12 | },
13 | "optimizer": {
14 | "type": "AdamW",
15 | "params": {
16 | "lr": "auto",
17 | "betas": "auto",
18 | "eps": "auto",
19 | "weight_decay": "auto"
20 | }
21 | },
22 | "scheduler": {
23 | "type": "WarmupLR",
24 | "params": {
25 | "warmup_min_lr": "auto",
26 | "warmup_max_lr": "auto",
27 | "warmup_num_steps": "auto"
28 | }
29 | },
30 | "zero_optimization": {
31 | "stage": 3,
32 | "offload_optimizer": {
33 | "device": "cpu",
34 | "pin_memory": true
35 | },
36 | "offload_param": {
37 | "device": "cpu",
38 | "pin_memory": true
39 | },
40 | "overlap_comm": true,
41 | "contiguous_gradients": true,
42 | "sub_group_size": 1e9,
43 | "reduce_bucket_size": "auto",
44 | "stage3_prefetch_bucket_size": "auto",
45 | "stage3_param_persistence_threshold": "auto",
46 | "stage3_max_live_parameters": 1e9,
47 | "stage3_max_reuse_distance": 1e9,
48 | "gather_16bit_weights_on_model_save": true
49 | },
50 | "gradient_accumulation_steps": "auto",
51 | "gradient_clipping": "auto",
52 | "train_batch_size": "auto",
53 | "train_micro_batch_size_per_gpu": "auto",
54 | "steps_per_print": 1e5,
55 | "wall_clock_breakdown": false
56 | }
--------------------------------------------------------------------------------
/DocOwl2/assets/Paper-Arxiv-orange.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/DocOwl2/assets/docowl2_effiency_and_case.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl2/assets/docowl2_effiency_and_case.jpg
--------------------------------------------------------------------------------
/DocOwl2/assets/modelscope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl2/assets/modelscope.png
--------------------------------------------------------------------------------
/DocOwl2/evaluation/due_evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | from .__main__ import cli_main
2 | from .due_evaluator import DueEvaluator
3 |
4 | __all__ = ['DueEvaluator', 'cli_main']
5 |
--------------------------------------------------------------------------------
/DocOwl2/evaluation/due_evaluator/__version__.py:
--------------------------------------------------------------------------------
1 | """Version specification."""
2 |
3 | VERSION = (0, 0, 8)
4 | __version__ = '.'.join(map(str, VERSION))
5 |
--------------------------------------------------------------------------------
/DocOwl2/evaluation/due_evaluator/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl2/evaluation/due_evaluator/py.typed
--------------------------------------------------------------------------------
/DocOwl2/evaluation/due_evaluator/scorers/__init__.py:
--------------------------------------------------------------------------------
1 | from .anls_scorer import AnlsScorer
2 | from .base_scorer import BaseScorer
3 | from .fscorer import FScorer
4 | from .mean_fscorer import MeanFScorer
5 | from .wtq_scorer import WtqScorer
6 | from .group_anls import GroupAnlsScorer
7 | from .geval_scorer import GevalScorer
8 |
9 | __all__ = ['AnlsScorer', 'BaseScorer', 'FScorer', 'MeanFScorer', 'WtqScorer', 'GevalScorer', 'GroupAnlsScorer']
10 |
--------------------------------------------------------------------------------
/DocOwl2/evaluation/due_evaluator/scorers/accuracy_scorer.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import List
3 | from operator import itemgetter
4 |
5 | from .base_scorer import BaseScorer
6 |
7 | logger = logging.getLogger(__name__)
8 |
9 |
10 | class AccuracyScorer(BaseScorer):
11 | """Accuracy Scorer."""
12 |
13 | def __init__(self, threshold: float = 0.5):
14 | self.__scores: List[float] = []
15 | self.threshold = threshold
16 |
17 | @property
18 | def scores(self):
19 | return self.__scores
20 |
21 | def check_denotation(self, out: list, ref: list) -> bool:
22 | return out == ref
23 |
24 | def add(self, out_items: List[dict], ref_items: List[dict]):
25 | """Add more items for computing corpus level scores.
26 |
27 | Args:
28 | out_items: outs from a single document (line)
29 | ref_items: reference of the evaluated document (line)
30 |
31 | """
32 | out_ann = sorted(out_items['annotations'], key=itemgetter('key'))
33 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key'))
34 | assert [a['key'] for a in out_ann] == [a['key'] for a in ref_ann]
35 |
36 | for out, ref in zip(out_ann, ref_ann):
37 | o_values = [v['value'] for v in out['values']]
38 | r_values = [v['value'] for v in ref['values']]
39 | score = int(self.check_denotation(o_values, r_values))
40 | self.__scores.append(score)
41 |
42 | def score(self) -> float:
43 | if self.__scores:
44 | return sum(self.__scores) / len(self.__scores)
45 | return 0.0
46 |
47 | @classmethod
48 | def support_feature_scores(cls) -> bool:
49 | return False
50 |
51 | @classmethod
52 | def metric_name(cls) -> str:
53 | return "Accuracy"
54 |
--------------------------------------------------------------------------------
/DocOwl2/evaluation/due_evaluator/scorers/anls_scorer.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import List
3 | from operator import itemgetter
4 |
5 | import textdistance
6 |
7 | from due_evaluator.scorers.base_scorer import BaseScorer
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | class AnlsScorer(BaseScorer):
13 | """ANSL Scorer."""
14 |
15 | def __init__(self, threshold: float = 0.5):
16 | self.__scores: List[float] = []
17 | self.threshold = threshold
18 |
19 | @property
20 | def scores(self):
21 | return self.__scores
22 |
23 | def add(self, out_items: List[dict], ref_items: List[dict]):
24 | """Add more items for computing corpus level scores.
25 |
26 | Args:
27 | out_items: outs from a single document (line)
28 | ref_items: reference of the evaluated document (line)
29 |
30 | """
31 | out_ann = sorted(out_items['annotations'], key=itemgetter('key'))
32 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key'))
33 | assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann]
34 |
35 | """try:
36 | # assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann]
37 | out_keys = [a['key'][:100] for a in out_ann]
38 | ref_keys = [a['key'][:100] for a in ref_ann]
39 | # assert out_keys == ref_keys
40 | for i in range(len(out_keys)):
41 | try:
42 | assert out_keys[i] == ref_keys[i]
43 | except AssertionError as e:
44 | print(out_keys[i])
45 | print(ref_keys[i])
46 | print('==============')
47 | # exit(0)
48 |
49 | except AssertionError as e:
50 | print('key of pred and gt unmatched:')
51 | # print('pred:', out_keys)
52 | # print('gt:', ref_keys)
53 | exit(0)"""
54 |
55 | for out, ref in zip(out_ann, ref_ann):
56 | assert len(out['values']) == 1
57 | val = out['values'][0]['value']
58 | possible_vals = ref['values'][0]['value_variants']
59 | best_score = max([textdistance.levenshtein.normalized_similarity(val, pos)
60 | for pos in possible_vals])
61 | if 1 - self.threshold >= best_score:
62 | best_score = 0.0
63 | self.__scores.append(best_score)
64 |
65 | def score(self) -> float:
66 | if self.__scores:
67 | return sum(self.__scores) / len(self.__scores)
68 | return 0.0
69 |
70 | @classmethod
71 | def support_feature_scores(cls) -> bool:
72 | return False
73 |
74 | @classmethod
75 | def metric_name(cls) -> str:
76 | return "ANLS"
77 |
--------------------------------------------------------------------------------
/DocOwl2/evaluation/due_evaluator/scorers/base_scorer.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from typing import List
3 |
4 |
5 | class BaseScorer(abc.ABC):
6 | """Abstract class for scorers."""
7 |
8 | @abc.abstractmethod
9 | def add(self, out_items: List[dict], ref_items: List[dict]):
10 | pass
11 |
12 | @abc.abstractmethod
13 | def score(self):
14 | pass
15 |
16 | @abc.abstractclassmethod
17 | def support_feature_scores(cls) -> bool:
18 | pass
19 |
20 | @abc.abstractclassmethod
21 | def metric_name(cls) -> str:
22 | pass
23 |
--------------------------------------------------------------------------------
/DocOwl2/evaluation/due_evaluator/scorers/geval_scorer.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | import tempfile
3 | from collections import defaultdict
4 | import os
5 |
6 | from due_evaluator.scorers.fscorer import FScorer
7 | from due_evaluator.scorers.base_scorer import BaseScorer
8 |
9 |
10 | GEVAL_BINARY = os.getenv('GEVAL_BINARY', '/data/shared/bin/geval')
11 | GEVAL_METRIC = os.getenv('GEVAL_METRIC', 'MultiLabel-F1:cN')
12 |
13 |
14 | class GevalScorer(BaseScorer):
15 | def __init__(self):
16 | self.__ref = tempfile.NamedTemporaryFile('w+t')
17 | self.__out = tempfile.NamedTemporaryFile('w+t')
18 | self.__ref_data = defaultdict(set)
19 | self.__out_data = defaultdict(set)
20 |
21 | @staticmethod
22 | def add_to_geval_data(data, line):
23 | name = line['name']
24 | for annotation in line['annotations']:
25 | for idx, val in enumerate(annotation['values'], 1):
26 | for child in val['children']:
27 | new_name = child['key'] + '__' + str(idx) if '__' in child['key'] else child['key']
28 | if child['values'] and child['values'] != ['']:
29 | new_value = '|'.join([v['value'].replace(' ', '_') for v in child['values']])
30 | data[name].add(f'{new_name}={new_value}')
31 |
32 | def save_geval_files(self):
33 | for name in sorted(self.__ref_data.keys()):
34 | self.__ref.write(' '.join(self.__ref_data[name]) + '\n')
35 | self.__out.write(' '.join(self.__out_data[name]) + '\n')
36 |
37 | def add(self, out_items: List[str], ref_items: List[str]):
38 | self.add_to_geval_data(self.__out_data, out_items)
39 | self.add_to_geval_data(self.__ref_data, ref_items)
40 |
41 | def support_feature_scores(cls) -> bool:
42 | return False
43 |
44 | def metric_name(cls) -> str:
45 | return "GEVAL"
46 |
47 | def run_geval(self):
48 | self.__ref.flush()
49 | self.__out.flush()
50 | try:
51 | return float(os.popen(f'{GEVAL_BINARY} -o {self.__out.name} -e {self.__ref.name} --metric {GEVAL_METRIC}').read())
52 | except:
53 | return -1
54 |
55 | def score(self) -> float:
56 | self.save_geval_files()
57 | return self.run_geval()
58 |
--------------------------------------------------------------------------------
/DocOwl2/evaluation/due_evaluator/scorers/mean_fscorer.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from due_evaluator.scorers.fscorer import FScorer
4 | from due_evaluator.scorers.base_scorer import BaseScorer
5 |
6 |
7 | class MeanFScorer(BaseScorer):
8 | def __init__(self):
9 | self.__scores: List[float] = []
10 |
11 | def add(self, out_items: List[str], ref_items: List[str]):
12 | fscorer = FScorer()
13 | fscorer.add(out_items, ref_items)
14 | self.__scores.append(fscorer.f_score())
15 |
16 | def support_feature_scores(cls) -> bool:
17 | return False
18 |
19 | def metric_name(cls) -> str:
20 | return "MEAN-F1"
21 |
22 | def score(self) -> float:
23 | if self.__scores:
24 | return sum(self.__scores) / len(self.__scores)
25 | return 0.0
26 |
--------------------------------------------------------------------------------
/DocOwl2/evaluation/due_evaluator/utils.py:
--------------------------------------------------------------------------------
1 | from due_evaluator.scorers.fscorer import FScorer
2 | from typing import Dict, List, Optional, Sequence, Union
3 |
4 | import pandas as pd
5 |
6 | from due_evaluator.due_evaluator import DueEvaluator
7 |
8 |
9 | def dataframe_to_print(df: pd.DataFrame, print_format: Optional[str] = 'text') -> str:
10 | """Export dataframe to json or plain text.
11 |
12 | Args:
13 | df (pd.DataFrame): data
14 | print_format (str, optional): Print format. Defaults to 'text'.
15 |
16 | Raises:
17 | ValueError: unknown print_format
18 |
19 | Returns:
20 | str: printed version of dataframe
21 |
22 | """
23 | out: str
24 | if print_format == 'latex':
25 | out = df.reset_index().to_latex(index=False)
26 | elif print_format == 'text':
27 | out = df.reset_index().to_string(index=False)
28 | elif print_format == 'json':
29 | out = df.to_json(orient='index')
30 | else:
31 | raise ValueError()
32 | return out
33 |
34 |
35 | def property_scores_to_string(
36 | dues: List[DueEvaluator], print_format: str = 'text', columns: Sequence[str] = ('Precision', 'Recall', 'F-1'),
37 | ) -> str:
38 | """Print out scores per property.
39 |
40 | Args:
41 | dues: List of DueEvaluators
42 | print_format: output format: text or latex
43 | columns: a list of metrics to print
44 |
45 | Returns:
46 | str: string table with feature scores.
47 |
48 | """
49 | data = []
50 | for property_name in sorted(dues[0].property_scorers.keys()) + ['ALL']:
51 | row_data: Dict[str, Union[str, float]] = {}
52 | row_data['Label'] = property_name
53 | for due in dues:
54 | if len(dues) == 1:
55 | suffix = ''
56 | else:
57 | suffix = f' ({due.path})'
58 | if property_name == 'ALL':
59 | scorer = due.general_scorer
60 | else:
61 | scorer = due.property_scorers[property_name]
62 |
63 | row_data[scorer.metric_name() + suffix] = scorer.score()
64 | if isinstance(scorer, FScorer):
65 | if 'Precision' in columns:
66 | row_data['Precision' + suffix] = scorer.precision()
67 | if 'Recall' in columns:
68 | row_data['Recall' + suffix] = scorer.recall()
69 | data.append(row_data)
70 |
71 | df = pd.DataFrame(data)
72 | df.set_index('Label', drop=True, inplace=True)
73 |
74 | return dataframe_to_print(df, print_format)
75 |
--------------------------------------------------------------------------------
/PaperOwl/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py eol=lf
2 | *.rst eol=lf
3 | *.md eol=lf
4 | *.mdx eol=lf
--------------------------------------------------------------------------------
/PaperOwl/.gitignore:
--------------------------------------------------------------------------------
1 | evaluate_results*
2 | checkpoints/
3 | benchmark_files/
4 | ureader_images
5 | ureader_json
6 | ureader_images/
7 | ureader_json/
8 | # Initially taken from Github's Python gitignore file
9 | tensorboard/*
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 | *$py.class
14 | .ossutil_*
15 | # C extensions
16 | *.so
17 |
18 | # tests and logs
19 | tests/fixtures/cached_*_text.txt
20 | logs/
21 | lightning_logs/
22 | lang_code_data/
23 |
24 | # Distribution / packaging
25 | .Python
26 | build/
27 | develop-eggs/
28 | dist/
29 | downloads/
30 | eggs/
31 | .eggs/
32 | lib/
33 | lib64/
34 | parts/
35 | sdist/
36 | var/
37 | wheels/
38 | *.egg-info/
39 | .installed.cfg
40 | *.egg
41 | MANIFEST
42 |
43 | # PyInstaller
44 | # Usually these files are written by a python script from a template
45 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
46 | *.manifest
47 | *.spec
48 |
49 | # Installer logs
50 | pip-log.txt
51 | pip-delete-this-directory.txt
52 |
53 | # Unit test / coverage reports
54 | htmlcov/
55 | .tox/
56 | .nox/
57 | .coverage
58 | .coverage.*
59 | .cache
60 | nosetests.xml
61 | coverage.xml
62 | *.cover
63 | .hypothesis/
64 | .pytest_cache/
65 |
66 | # Translations
67 | *.mo
68 | *.pot
69 |
70 | # Django stuff:
71 | *.log
72 | local_settings.py
73 | db.sqlite3
74 |
75 | # Flask stuff:
76 | instance/
77 | .webassets-cache
78 |
79 | # Scrapy stuff:
80 | .scrapy
81 |
82 | # Sphinx documentation
83 | docs/_build/
84 |
85 | # PyBuilder
86 | target/
87 |
88 | # Jupyter Notebook
89 | .ipynb_checkpoints
90 |
91 | # IPython
92 | profile_default/
93 | ipython_config.py
94 |
95 | # pyenv
96 | .python-version
97 |
98 | # celery beat schedule file
99 | celerybeat-schedule
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # vscode
132 | .vs
133 | .vscode
134 |
135 | # Pycharm
136 | .idea
137 |
138 | # TF code
139 | tensorflow_code
140 |
141 | # Models
142 | proc_data
143 |
144 | # examples
145 | runs
146 | /runs_old
147 | /wandb
148 | /output
149 | /configs_dev
150 | /scripts_dev
151 | # /examples/runs
152 | # /examples/**/*.args
153 | # /examples/rag/sweep
154 |
155 | # data
156 | /data
157 | serialization_dir
158 |
159 | # emacs
160 | *.*~
161 | debug.env
162 |
163 | # vim
164 | .*.swp
165 |
166 | #ctags
167 | tags
168 |
169 | # pre-commit
170 | .pre-commit*
171 |
172 | # .lock
173 | *.lock
174 |
175 | # DS_Store (MacOS)
176 | .DS_Store
177 |
178 | # ruff
179 | .ruff_cache
180 |
--------------------------------------------------------------------------------
/PaperOwl/assets/Paper-Arxiv-orange.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/PaperOwl/assets/Paper-PDF-orange.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/PaperOwl/assets/data_process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/assets/data_process.png
--------------------------------------------------------------------------------
/PaperOwl/assets/diagram_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/assets/diagram_distribution.png
--------------------------------------------------------------------------------
/PaperOwl/assets/intro_case.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/assets/intro_case.jpeg
--------------------------------------------------------------------------------
/PaperOwl/assets/paper_category.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/assets/paper_category.png
--------------------------------------------------------------------------------
/PaperOwl/configs/sft/release.yaml:
--------------------------------------------------------------------------------
1 | train_processors: {
2 | sft: {type: 'DocNewMultiScaleSFTProcessor', image_size: 224,
3 | anchors: [[2, 2]]}
4 | }
5 |
6 | valid_processors: {
7 | sft: {type: 'DocNewMultiScaleSFTProcessor', image_size: 224,
8 | anchors: [[2, 2]]}
9 | }
10 |
11 | data_files: [
12 | 'M-Paper/sft/3tasks_train.jsonl',
13 | 'M-Paper/sft/3tasks_val.jsonl'
14 | ]
15 |
16 | patch_pos_embed_type: post
17 |
18 |
--------------------------------------------------------------------------------
/PaperOwl/ds_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "fp16": {
3 | "enabled": "auto",
4 | "loss_scale": 0,
5 | "loss_scale_window": 1000,
6 | "initial_scale_power": 16,
7 | "hysteresis": 2,
8 | "min_loss_scale": 1
9 | },
10 | "bf16": {
11 | "enabled": "auto"
12 | },
13 | "zero_optimization": {
14 | "stage": 1
15 | },
16 | "train_batch_size": "auto",
17 | "train_micro_batch_size_per_gpu": "auto"
18 | }
--------------------------------------------------------------------------------
/PaperOwl/mplug_owl/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import TYPE_CHECKING
15 |
16 | from transformers.utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
17 |
18 |
19 | _import_structure = {
20 | "configuration_mplug_owl": ["MPLUG_OWL_PRETRAINED_CONFIG_ARCHIVE_MAP", "MplugOwlConfig"],
21 | "processing_mplug_owl": ["MplugOwlImageProcessor", "MplugOwlProcessor"],
22 | "tokenization_mplug_owl": ["MplugOwlTokenizer"],
23 | }
24 |
25 | try:
26 | if not is_tokenizers_available():
27 | raise OptionalDependencyNotAvailable()
28 | except OptionalDependencyNotAvailable:
29 | pass
30 |
31 |
32 | try:
33 | if not is_torch_available():
34 | raise OptionalDependencyNotAvailable()
35 | except OptionalDependencyNotAvailable:
36 | pass
37 | else:
38 | _import_structure["modeling_mplug_owl"] = [
39 | "MPLUG_OWL_PRETRAINED_MODEL_ARCHIVE_LIST",
40 | "MplugOwlForConditionalGeneration",
41 | "MplugOwlModel",
42 | ]
43 |
44 |
45 | if TYPE_CHECKING:
46 | from .configuration_mplug_owl import MPLUG_OWL_PRETRAINED_CONFIG_ARCHIVE_MAP, MplugOwlConfig
47 | from .tokenization_mplug_owl import MplugOwlTokenizer
48 |
49 | try:
50 | if not is_tokenizers_available():
51 | raise OptionalDependencyNotAvailable()
52 | except OptionalDependencyNotAvailable:
53 | pass
54 |
55 | try:
56 | if not is_torch_available():
57 | raise OptionalDependencyNotAvailable()
58 | except OptionalDependencyNotAvailable:
59 | pass
60 | else:
61 | from .modeling_mplug_owl import (
62 | MPLUG_OWL_PRETRAINED_MODEL_ARCHIVE_LIST,
63 | MplugOwlForConditionalGeneration,
64 | MplugOwlModel,
65 | MplugOwlPreTrainedModel,
66 | )
67 |
68 |
69 | else:
70 | import sys
71 |
72 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
73 |
74 | from .configuration_mplug_owl import *
75 | from .modeling_mplug_owl import *
76 | from .processing_mplug_owl import *
77 | from .tokenization_mplug_owl import *
78 |
--------------------------------------------------------------------------------
/PaperOwl/mplug_owl/tokenization_mplug_owl.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2022 x-plug and The HuggingFace Inc. team. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tokenization classes for MplugOwl."""
16 |
17 | from transformers.utils import logging
18 | from transformers.models.llama.tokenization_llama import LlamaTokenizer
19 |
20 |
21 | logger = logging.get_logger(__name__)
22 |
23 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
24 |
25 | PRETRAINED_VOCAB_FILES_MAP = {
26 | "vocab_file": {
27 | "MAGAer13/mplug-owl-llama-7b": "https://huggingface.co/MAGAer13/mplug-owl-llama-7b/resolve/main/vocab.txt",
28 | },
29 | }
30 |
31 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
32 | "MAGAer13/mplug-owl-llama-7b": 1024,
33 | }
34 |
35 |
36 | class MplugOwlTokenizer(LlamaTokenizer):
37 | def __init__(
38 | self,
39 | vocab_file,
40 | unk_token="",
41 | bos_token="",
42 | eos_token="",
43 | pad_token="",
44 | sp_model_kwargs=None,
45 | add_bos_token=False,
46 | add_eos_token=False,
47 | clean_up_tokenization_spaces=False,
48 | **kwargs,
49 | ):
50 | super().__init__(
51 | vocab_file,
52 | unk_token,
53 | bos_token,
54 | eos_token,
55 | pad_token,
56 | sp_model_kwargs,
57 | add_bos_token,
58 | add_eos_token,
59 | clean_up_tokenization_spaces,
60 | **kwargs,
61 | )
62 | self.eod_id = self.eos_token_id
63 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/pipeline/__init__.py
--------------------------------------------------------------------------------
/PaperOwl/pipeline/data_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .processors.builder import build_processors
2 | from .xgpt3_dataset import MultiModalDataset
3 |
4 | def train_valid_test_datasets_provider(data_path, config, tokenizer, seq_length=1024,image_root='ureader_images'):
5 | """Build train and valid datasets."""
6 | print('> building train and validation datasets for mPLUG-Owl ...')
7 | train_ds, valid_ds = build_train_valid_test_datasets(
8 | input_file=data_path,
9 | tokenizer=tokenizer,
10 | max_length=seq_length,
11 | config=config,
12 | image_root=image_root)
13 | print("> finished creating mPLUG-Owl datasets ...")
14 |
15 | return train_ds, valid_ds
16 |
17 | def build_train_valid_test_datasets(input_file, tokenizer, max_length=80, config=None,image_root='ureader_images'):
18 | train_processors = build_processors(config['train_processors'])
19 | valid_processors = build_processors(config['valid_processors'])
20 | if isinstance(input_file, dict):
21 | train_ds = MultiModalDataset(input_file['train'][0], tokenizer, train_processors, max_length, image_root=image_root)
22 | valid_ds = {name: MultiModalDataset(ds, tokenizer, valid_processors, max_length) for name,ds in input_file['valid'].items()}
23 | test_ds = None
24 |
25 | else:
26 | assert len(input_file) == 2 # If you have files more than 2, modify code at here or merger them into train and dev
27 | train_ds = MultiModalDataset(input_file[0], tokenizer, train_processors, max_length)
28 | valid_ds = MultiModalDataset(input_file[1], tokenizer, valid_processors, max_length)
29 | test_ds = None
30 | return (train_ds, valid_ds)
31 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/data_utils/processors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Alibaba. All rights reserved.
2 | from .builder import PROCESSORS, build_processors
3 | from .default_processor import DefaultProcessor
4 | from .caption_processor import CaptionProcessor
5 | from .doc_processor import DocPretrainProcessor, DocSFTProcessor
6 | __all__ = [
7 | 'PROCESSORS', 'build_processors',
8 | 'DefaultProcessor', 'CaptionProcessor',
9 | 'DocPretrainProcessor', 'DocSFTProcessor'
10 | ]
--------------------------------------------------------------------------------
/PaperOwl/pipeline/data_utils/processors/builder.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from icecream import ic
4 | from pipeline.data_utils.registry import Registry, build_from_cfg
5 | # from .data_utils.registry import Registry, build_from_cfg
6 |
7 | PROCESSORS = Registry('processors')
8 |
9 | def build_processors(processors_cfg):
10 | processors = dict()
11 | for task, processor in processors_cfg.items():
12 | processors[task] = build_from_cfg(processor, PROCESSORS)
13 | ic(type(processors[task]))
14 | return processors
15 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/data_utils/processors/caption_processor.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision import transforms
3 | from PIL import Image
4 | import random
5 |
6 | from pipeline.data_utils.randaugment import RandomAugment
7 | from .builder import PROCESSORS
8 |
9 |
10 | @PROCESSORS.register_module()
11 | class CaptionProcessor:
12 | def __init__(self, image_size=224, min_scale = 0.5, randaug=False):
13 | self.image_size = image_size
14 | self.min_scale = min_scale
15 |
16 | if randaug:
17 | self.image_transform = transforms.Compose([
18 | transforms.RandomResizedCrop(image_size,scale=(min_scale, 1.0), interpolation=Image.BICUBIC),
19 | transforms.RandomHorizontalFlip(),
20 | RandomAugment(2,7,isPIL=True,augs=['Identity','AutoContrast','Equalize','Brightness','Sharpness',
21 | 'ShearX', 'ShearY', 'TranslateX', 'TranslateY', 'Rotate']),
22 | transforms.ToTensor(),
23 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
24 | ])
25 | else:
26 | self.image_transform = transforms.Compose([
27 | transforms.RandomResizedCrop(image_size,scale=(min_scale, 1.0), interpolation=Image.BICUBIC),
28 | transforms.RandomHorizontalFlip(),
29 | transforms.ToTensor(),
30 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
31 | ])
32 | self.text_transform = None
33 |
34 | def __call__(self, image, text):
35 | assert image or text
36 |
37 | if image:
38 | image_input = self.image_transform(image)
39 | else:
40 | image_input = None
41 |
42 | if text:
43 | if isinstance(text["prompt"], list):
44 | prompt = random.choice(text["prompt"])
45 | else:
46 | prompt = text["prompt"]
47 | text_input = dict(
48 | prompt=prompt,
49 | completion=text["text"],
50 | )
51 | else:
52 | text_input = None
53 | return image_input, text_input
--------------------------------------------------------------------------------
/PaperOwl/pipeline/data_utils/processors/default_processor.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision import transforms
3 | from PIL import Image
4 | import random
5 |
6 | from pipeline.data_utils.randaugment import RandomAugment
7 | from .builder import PROCESSORS
8 |
9 |
10 | @PROCESSORS.register_module()
11 | class DefaultProcessor:
12 | def __init__(self, image_size=224):
13 | self.image_size = image_size
14 |
15 | self.image_transform = transforms.Compose([
16 | transforms.Resize((image_size, image_size),interpolation=Image.BICUBIC),
17 | transforms.ToTensor(),
18 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
19 | ])
20 |
21 | self.text_transform = None
22 |
23 | def __call__(self, image, text):
24 | assert image or text
25 |
26 | if image:
27 | image_input = self.image_transform(image)
28 | else:
29 | image_input = None
30 |
31 | if text:
32 | if isinstance(text["prompt"], list):
33 | prompt = random.choice(text["prompt"])
34 | else:
35 | prompt = text["prompt"]
36 | text_input = dict(
37 | prompt=prompt,
38 | completion=text["text"],
39 | )
40 | else:
41 | text_input = None
42 | return image_input, text_input
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/pipeline/eval_utils/__init__.py
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | from .__main__ import cli_main
2 | from .due_evaluator import DueEvaluator
3 |
4 | __all__ = ['DueEvaluator', 'cli_main']
5 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import argparse
5 | import sys
6 | from typing import Optional, Set
7 | import json
8 |
9 | from .due_evaluator import DueEvaluator
10 | from .utils import property_scores_to_string
11 |
12 |
13 | def parse_args():
14 | """Parse CLI arguments.
15 |
16 | Returns:
17 | namespace: namespace with parsed variables.
18 |
19 | """
20 | parser = argparse.ArgumentParser('Document Understanding Evaluator')
21 | parser.add_argument(
22 | '--out-files',
23 | '-o',
24 | type=argparse.FileType('r', encoding='utf-8'),
25 | required=True,
26 | nargs='+',
27 | help='Out file to evaluate',
28 | )
29 | parser.add_argument(
30 | '--reference', '-r', type=argparse.FileType('r', encoding='utf-8'), required=True, help='Reference file',
31 | )
32 | parser.add_argument('--metric', '-m', type=str, default='F1', choices=['F1', 'MEAN-F1', 'ANLS', 'WTQ', 'GROUP-ANLS'])
33 | parser.add_argument(
34 | '--return-score',
35 | default='F1',
36 | choices=['F1', 'mean-F1', 'ANLS', 'mean-Precision', 'mean-Recall', 'WTQ'],
37 | help='Return WR-like mean-F1 score',
38 | )
39 | parser.add_argument('--line-by-line', action='store_true', default=False, help='Return retults example-based')
40 | parser.add_argument(
41 | '--columns', type=str, nargs='+', default=['Precision', 'Recall', 'F1'], help='Columns',
42 | )
43 | parser.add_argument(
44 | '--print-format',
45 | default='text',
46 | type=str,
47 | choices=['text', 'latex', 'json'],
48 | help='Print feature table in the given format',
49 | )
50 | parser.add_argument('--properties', nargs='+', type=str, help='Property set to be limitted to')
51 | parser.add_argument(
52 | '--ignore-case', '-i', action='store_true', default=False, help='Property set to be limitted to',
53 | )
54 | return parser.parse_args()
55 |
56 |
57 | def cli_main(args: argparse.Namespace):
58 | """CLI main.
59 |
60 | Args:
61 | args: cli arguments
62 | """
63 | reference = [json.loads(line) for line in args.reference]
64 |
65 | evaluators = []
66 | for out_file in args.out_files:
67 | predictions = [json.loads(line) for line in out_file]
68 |
69 | property_set: Optional[Set[str]]
70 | if args.properties:
71 | property_set = args.properties
72 | else:
73 | property_set = None
74 |
75 | evaluators.append(
76 | DueEvaluator(reference, predictions, property_set, args.ignore_case, out_file.name, args.metric)
77 | )
78 |
79 | prop_str = property_scores_to_string(evaluators, args.print_format, args.columns)
80 | if args.print_format != 'json':
81 | print(prop_str, file=sys.stderr)
82 |
83 | if args.line_by_line:
84 | for idx, score in enumerate(evaluators[0].line_by_line()):
85 | print(f'{idx}: {score}', file=sys.stderr)
86 | return prop_str
87 |
88 |
89 | def main() -> None:
90 | """Main."""
91 | args = parse_args()
92 | cli_main(args)
93 |
94 |
95 | if __name__ == '__main__':
96 | main()
97 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/__version__.py:
--------------------------------------------------------------------------------
1 | """Version specification."""
2 |
3 | VERSION = (0, 0, 8)
4 | __version__ = '.'.join(map(str, VERSION))
5 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/pipeline/eval_utils/due_evaluator/py.typed
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/scorers/__init__.py:
--------------------------------------------------------------------------------
1 | from .anls_scorer import AnlsScorer
2 | from .base_scorer import BaseScorer
3 | from .fscorer import FScorer
4 | from .mean_fscorer import MeanFScorer
5 | from .wtq_scorer import WtqScorer
6 | from .group_anls import GroupAnlsScorer
7 | from .geval_scorer import GevalScorer
8 |
9 | __all__ = ['AnlsScorer', 'BaseScorer', 'FScorer', 'MeanFScorer', 'WtqScorer', 'GevalScorer', 'GroupAnlsScorer']
10 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/scorers/accuracy_scorer.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import List
3 | from operator import itemgetter
4 |
5 | from .base_scorer import BaseScorer
6 |
7 | logger = logging.getLogger(__name__)
8 |
9 |
10 | class AccuracyScorer(BaseScorer):
11 | """Accuracy Scorer."""
12 |
13 | def __init__(self, threshold: float = 0.5):
14 | self.__scores: List[float] = []
15 | self.threshold = threshold
16 |
17 | @property
18 | def scores(self):
19 | return self.__scores
20 |
21 | def check_denotation(self, out: list, ref: list) -> bool:
22 | return out == ref
23 |
24 | def add(self, out_items: List[dict], ref_items: List[dict]):
25 | """Add more items for computing corpus level scores.
26 |
27 | Args:
28 | out_items: outs from a single document (line)
29 | ref_items: reference of the evaluated document (line)
30 |
31 | """
32 | out_ann = sorted(out_items['annotations'], key=itemgetter('key'))
33 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key'))
34 | assert [a['key'] for a in out_ann] == [a['key'] for a in ref_ann]
35 |
36 | for out, ref in zip(out_ann, ref_ann):
37 | o_values = [v['value'] for v in out['values']]
38 | r_values = [v['value'] for v in ref['values']]
39 | score = int(self.check_denotation(o_values, r_values))
40 | self.__scores.append(score)
41 |
42 | def score(self) -> float:
43 | if self.__scores:
44 | return sum(self.__scores) / len(self.__scores)
45 | return 0.0
46 |
47 | @classmethod
48 | def support_feature_scores(cls) -> bool:
49 | return False
50 |
51 | @classmethod
52 | def metric_name(cls) -> str:
53 | return "Accuracy"
54 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/scorers/anls_scorer.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import List
3 | from operator import itemgetter
4 |
5 | import textdistance
6 |
7 | from .base_scorer import BaseScorer
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | class AnlsScorer(BaseScorer):
13 | """ANSL Scorer."""
14 |
15 | def __init__(self, threshold: float = 0.5):
16 | self.__scores: List[float] = []
17 | self.threshold = threshold
18 |
19 | @property
20 | def scores(self):
21 | return self.__scores
22 |
23 | def add(self, out_items: List[dict], ref_items: List[dict]):
24 | """Add more items for computing corpus level scores.
25 |
26 | Args:
27 | out_items: outs from a single document (line)
28 | ref_items: reference of the evaluated document (line)
29 |
30 | """
31 | out_ann = sorted(out_items['annotations'], key=itemgetter('key'))
32 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key'))
33 | assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann]
34 |
35 | """try:
36 | # assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann]
37 | out_keys = [a['key'][:100] for a in out_ann]
38 | ref_keys = [a['key'][:100] for a in ref_ann]
39 | # assert out_keys == ref_keys
40 | for i in range(len(out_keys)):
41 | try:
42 | assert out_keys[i] == ref_keys[i]
43 | except AssertionError as e:
44 | print(out_keys[i])
45 | print(ref_keys[i])
46 | print('==============')
47 | # exit(0)
48 |
49 | except AssertionError as e:
50 | print('key of pred and gt unmatched:')
51 | # print('pred:', out_keys)
52 | # print('gt:', ref_keys)
53 | exit(0)"""
54 |
55 | for out, ref in zip(out_ann, ref_ann):
56 | assert len(out['values']) == 1
57 | val = out['values'][0]['value']
58 | possible_vals = ref['values'][0]['value_variants']
59 | best_score = max([textdistance.levenshtein.normalized_similarity(val, pos)
60 | for pos in possible_vals])
61 | if 1 - self.threshold >= best_score:
62 | best_score = 0.0
63 | self.__scores.append(best_score)
64 |
65 | def score(self) -> float:
66 | if self.__scores:
67 | return sum(self.__scores) / len(self.__scores)
68 | return 0.0
69 |
70 | @classmethod
71 | def support_feature_scores(cls) -> bool:
72 | return False
73 |
74 | @classmethod
75 | def metric_name(cls) -> str:
76 | return "ANLS"
77 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/scorers/base_scorer.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from typing import List
3 |
4 |
5 | class BaseScorer(abc.ABC):
6 | """Abstract class for scorers."""
7 |
8 | @abc.abstractmethod
9 | def add(self, out_items: List[dict], ref_items: List[dict]):
10 | pass
11 |
12 | @abc.abstractmethod
13 | def score(self):
14 | pass
15 |
16 | @abc.abstractclassmethod
17 | def support_feature_scores(cls) -> bool:
18 | pass
19 |
20 | @abc.abstractclassmethod
21 | def metric_name(cls) -> str:
22 | pass
23 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/scorers/geval_scorer.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | import tempfile
3 | from collections import defaultdict
4 | import os
5 |
6 | from .fscorer import FScorer
7 | from .base_scorer import BaseScorer
8 |
9 |
10 | GEVAL_BINARY = os.getenv('GEVAL_BINARY', '/data/shared/bin/geval')
11 | GEVAL_METRIC = os.getenv('GEVAL_METRIC', 'MultiLabel-F1:cN')
12 |
13 |
14 | class GevalScorer(BaseScorer):
15 | def __init__(self):
16 | self.__ref = tempfile.NamedTemporaryFile('w+t')
17 | self.__out = tempfile.NamedTemporaryFile('w+t')
18 | self.__ref_data = defaultdict(set)
19 | self.__out_data = defaultdict(set)
20 |
21 | @staticmethod
22 | def add_to_geval_data(data, line):
23 | name = line['name']
24 | for annotation in line['annotations']:
25 | for idx, val in enumerate(annotation['values'], 1):
26 | for child in val['children']:
27 | new_name = child['key'] + '__' + str(idx) if '__' in child['key'] else child['key']
28 | if child['values'] and child['values'] != ['']:
29 | new_value = '|'.join([v['value'].replace(' ', '_') for v in child['values']])
30 | data[name].add(f'{new_name}={new_value}')
31 |
32 | def save_geval_files(self):
33 | for name in sorted(self.__ref_data.keys()):
34 | self.__ref.write(' '.join(self.__ref_data[name]) + '\n')
35 | self.__out.write(' '.join(self.__out_data[name]) + '\n')
36 |
37 | def add(self, out_items: List[str], ref_items: List[str]):
38 | self.add_to_geval_data(self.__out_data, out_items)
39 | self.add_to_geval_data(self.__ref_data, ref_items)
40 |
41 | def support_feature_scores(cls) -> bool:
42 | return False
43 |
44 | def metric_name(cls) -> str:
45 | return "GEVAL"
46 |
47 | def run_geval(self):
48 | self.__ref.flush()
49 | self.__out.flush()
50 | try:
51 | return float(os.popen(f'{GEVAL_BINARY} -o {self.__out.name} -e {self.__ref.name} --metric {GEVAL_METRIC}').read())
52 | except:
53 | return -1
54 |
55 | def score(self) -> float:
56 | self.save_geval_files()
57 | return self.run_geval()
58 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/scorers/mean_fscorer.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from .fscorer import FScorer
4 | from .base_scorer import BaseScorer
5 |
6 |
7 | class MeanFScorer(BaseScorer):
8 | def __init__(self):
9 | self.__scores: List[float] = []
10 |
11 | def add(self, out_items: List[str], ref_items: List[str]):
12 | fscorer = FScorer()
13 | fscorer.add(out_items, ref_items)
14 | self.__scores.append(fscorer.f_score())
15 |
16 | def support_feature_scores(cls) -> bool:
17 | return False
18 |
19 | def metric_name(cls) -> str:
20 | return "MEAN-F1"
21 |
22 | def score(self) -> float:
23 | if self.__scores:
24 | return sum(self.__scores) / len(self.__scores)
25 | return 0.0
26 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/due_evaluator/utils.py:
--------------------------------------------------------------------------------
1 | from .scorers.fscorer import FScorer
2 | from typing import Dict, List, Optional, Sequence, Union
3 |
4 | import pandas as pd
5 |
6 | from .due_evaluator import DueEvaluator
7 |
8 |
9 | def dataframe_to_print(df: pd.DataFrame, print_format: Optional[str] = 'text') -> str:
10 | """Export dataframe to json or plain text.
11 |
12 | Args:
13 | df (pd.DataFrame): data
14 | print_format (str, optional): Print format. Defaults to 'text'.
15 |
16 | Raises:
17 | ValueError: unknown print_format
18 |
19 | Returns:
20 | str: printed version of dataframe
21 |
22 | """
23 | out: str
24 | if print_format == 'latex':
25 | out = df.reset_index().to_latex(index=False)
26 | elif print_format == 'text':
27 | out = df.reset_index().to_string(index=False)
28 | elif print_format == 'json':
29 | out = df.to_json(orient='index')
30 | else:
31 | raise ValueError()
32 | return out
33 |
34 |
35 | def property_scores_to_string(
36 | dues: List[DueEvaluator], print_format: str = 'text', columns: Sequence[str] = ('Precision', 'Recall', 'F-1'),
37 | ) -> str:
38 | """Print out scores per property.
39 |
40 | Args:
41 | dues: List of DueEvaluators
42 | print_format: output format: text or latex
43 | columns: a list of metrics to print
44 |
45 | Returns:
46 | str: string table with feature scores.
47 |
48 | """
49 | data = []
50 | for property_name in sorted(dues[0].property_scorers.keys()) + ['ALL']:
51 | row_data: Dict[str, Union[str, float]] = {}
52 | row_data['Label'] = property_name
53 | for due in dues:
54 | if len(dues) == 1:
55 | suffix = ''
56 | else:
57 | suffix = f' ({due.path})'
58 | if property_name == 'ALL':
59 | scorer = due.general_scorer
60 | else:
61 | scorer = due.property_scorers[property_name]
62 |
63 | row_data[scorer.metric_name() + suffix] = scorer.score()
64 | if isinstance(scorer, FScorer):
65 | if 'Precision' in columns:
66 | row_data['Precision' + suffix] = scorer.precision()
67 | if 'Recall' in columns:
68 | row_data['Recall' + suffix] = scorer.recall()
69 | data.append(row_data)
70 |
71 | df = pd.DataFrame(data)
72 | df.set_index('Label', drop=True, inplace=True)
73 |
74 | return dataframe_to_print(df, print_format)
75 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/eval_utils/run_evaluation.py:
--------------------------------------------------------------------------------
1 | from .tools import llm_answer_eval, postprocess_llm_vqa, textcaps_textvqa_eval
2 |
3 | if __name__ == '__main__':
4 |
5 | llm_answer_eval(metric_names=['RelaxedAccuracy'], result_path='evaluate_results/test_ChartQA.jsonl', save_each_eval=True)
6 | llm_answer_eval(metric_names=['ExactAccuracy'], result_path='evaluate_results/test_TabFact.jsonl', save_each_eval=True)
7 | llm_answer_eval(metric_names=['BLEU1', 'BLEU2', 'BLEU3', 'BLEU4', 'Meteor', 'RougeL', 'CIDEr'], result_path='evaluate_results/test_VisualMRC.jsonl', save_each_eval=True)
8 |
9 |
10 | postprocess_llm_vqa(dataset_name='DeepFormQA', split='test',
11 | llm_pred_path='./evaluate_results/test_DeepForm.jsonl',
12 | eval_flag=True)
13 | postprocess_llm_vqa(dataset_name='DocVQA', split='test',
14 | llm_pred_path='./evaluate_results/test_DocVQA.jsonl',
15 | eval_flag=True)
16 | postprocess_llm_vqa(dataset_name='InfographicsVQA', split='test',
17 | llm_pred_path='evaluate_results/test_InfographicsVQA.jsonl',
18 | eval_flag=True)
19 | postprocess_llm_vqa(dataset_name='KleisterCharityQA', split='test',
20 | llm_pred_path='evaluate_results/test_KleisterCharity.jsonl',
21 | eval_flag=True)
22 | postprocess_llm_vqa(dataset_name='WikiTableQuestions', split='test',
23 | llm_pred_path='evaluate_results/test_WikiTableQuestions.jsonl',
24 | eval_flag=True)
25 |
26 | # need to submit evaluate_results/***_official_eval.json
27 | textcaps_textvqa_eval(result_path='evaluate_results/test_TextCaps.jsonl', dataset='TextCaps', split='test')
28 | textcaps_textvqa_eval(result_path='evaluate_results/test_TextVQA.jsonl', dataset='TextVQA', split='test')
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/interface.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import requests
4 | from PIL import Image
5 | from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration
6 | from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer
7 | from mplug_owl.processing_mplug_owl import MplugOwlImageProcessor, MplugOwlProcessor
8 | from sconf import Config
9 | from pipeline.data_utils.processors.builder import build_processors
10 |
11 |
12 | def get_model(pretrained_ckpt, use_bf16=False):
13 | """Model Provider with tokenizer and processor.
14 |
15 | Args:
16 | pretrained_ckpt (string): The path to pre-trained checkpoint.
17 | use_bf16 (bool, optional): Whether to use bfloat16 to load the model. Defaults to False.
18 |
19 | Returns:
20 | model: MplugOwl Model
21 | tokenizer: MplugOwl text tokenizer
22 | processor: MplugOwl processor (including text and image)
23 | """
24 | model = MplugOwlForConditionalGeneration.from_pretrained(
25 | pretrained_ckpt,
26 | torch_dtype=torch.bfloat16 if use_bf16 else torch.half,
27 | )
28 | config = Config('configs/sft/release.yaml')
29 | image_processor = build_processors(config['valid_processors'])['sft']
30 | tokenizer = MplugOwlTokenizer.from_pretrained(pretrained_ckpt)
31 | processor = MplugOwlProcessor(image_processor, tokenizer)
32 | return model, tokenizer, processor
33 |
34 |
35 | def do_generate(prompts, image_list, model, tokenizer, processor, use_bf16=False, **generate_kwargs):
36 | """The interface for generation
37 |
38 | Args:
39 | prompts (List[str]): The prompt text
40 | image_list (List[str]): Paths of images
41 | model (MplugOwlForConditionalGeneration): MplugOwlForConditionalGeneration
42 | tokenizer (MplugOwlTokenizer): MplugOwlTokenizer
43 | processor (MplugOwlProcessor): MplugOwlProcessor
44 | use_bf16 (bool, optional): Whether to use bfloat16. Defaults to False.
45 |
46 | Returns:
47 | sentence (str): Generated sentence.
48 | """
49 | if image_list:
50 | images = [Image.open(_) for _ in image_list]
51 | else:
52 | images = None
53 | inputs = processor(text=prompts, images=images, return_tensors='pt')
54 | inputs = {k: v.bfloat16() if v.dtype == torch.float else v for k, v in inputs.items()}
55 | inputs = {k: v.to(model.device) for k, v in inputs.items()}
56 | with torch.no_grad():
57 | res = model.generate(**inputs, **generate_kwargs)
58 | sentence = tokenizer.decode(res.tolist()[0], skip_special_tokens=True)
59 | return sentence
60 |
61 |
62 | if __name__ == '__main__':
63 | pass
64 |
--------------------------------------------------------------------------------
/PaperOwl/pipeline/trainer.py:
--------------------------------------------------------------------------------
1 | import torch.distributed as dist
2 | import argparse
3 | from functools import partial
4 |
5 | import torch
6 |
7 | from torch.utils.data import DataLoader, Dataset
8 | from torch.utils.data.distributed import DistributedSampler
9 |
10 | from transformers import Trainer
11 |
12 | from pipeline.utils import batchify
13 |
14 |
15 | class CustomTrainer(Trainer):
16 |
17 | def get_train_dataloader(self) -> DataLoader:
18 | dataset = self.train_dataset
19 | sampler = DistributedSampler(dataset)
20 | return torch.utils.data.DataLoader(
21 | dataset, batch_size=self._train_batch_size,
22 | sampler=sampler,
23 | num_workers=self.args.dataloader_num_workers,
24 | drop_last=True,
25 | pin_memory=False,
26 | collate_fn=batchify)
27 |
28 |
29 | def get_eval_dataloader(self, eval_dataset: Dataset | None = None) -> DataLoader:
30 | dataset = self.eval_dataset
31 | sampler = DistributedSampler(dataset, shuffle=False)
32 | return torch.utils.data.DataLoader(
33 | dataset, batch_size=self._train_batch_size,
34 | sampler=sampler,
35 | num_workers=self.args.dataloader_num_workers,
36 | drop_last=True,
37 | pin_memory=False,
38 | collate_fn=batchify)
--------------------------------------------------------------------------------
/PaperOwl/scripts/train_it.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # For A100 80G
3 | DIR=`pwd`
4 | export PYTHONPATH=$DIR
5 | DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
6 |
7 | if [ $MASTER_ADDR ];then
8 | echo $MASTER_ADDR
9 | echo $MASTER_PORT
10 | echo $WORLD_SIZE
11 | echo $RANK
12 | else
13 | MASTER_ADDR=127.0.0.1
14 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15
15 | WORLD_SIZE=1
16 | RANK=0
17 | fi
18 |
19 | DISTRIBUTED_ARGS="--nproc_per_node 1 \
20 | --nnodes ${WORLD_SIZE} \
21 | --node_rank ${RANK} \
22 | --master_addr ${MASTER_ADDR} \
23 | --master_port ${MASTER_PORT}"
24 |
25 | EXP_NAME=paperowl
26 |
27 | max_length=2304
28 | micro_batch_size=4
29 | global_batch_size=256
30 | gradient_accumulation_steps=1
31 |
32 | SAVE_NAME=${ureader}_${max_length}_${global_batch_size}
33 |
34 | SAVE_PATH="./output/${EXP_NAME}/"
35 | TENSORBOARD_PATH="./tensorboard/sft/${SAVE_NAME}/"
36 |
37 |
38 | train_epochs=10
39 | train_iters=29000
40 |
41 | lr_warmup_iters=36
42 |
43 | eval_iter=290
44 | eval_interval=1160
45 | save_interval=1160
46 |
47 | mkdir -p ${SAVE_PATH}
48 | mkdir -p ${TENSORBOARD_PATH}
49 |
50 | options=" \
51 | --pretrained-ckpt checkpoints/mplug-owl-llama-7b \
52 | --seq-length ${max_length} \
53 | --micro-batch-size ${micro_batch_size} \
54 | --global-batch-size ${global_batch_size} \
55 | --num-training-steps ${train_iters} \
56 | --train-epochs ${train_epochs} \
57 | --num-warmup-steps ${lr_warmup_iters} \
58 | --gradient-accumulation-steps ${gradient_accumulation_steps} \
59 | --lr 1e-4 \
60 | --min-lr 1e-6 \
61 | --eval-iters ${eval_iter} \
62 | --save-interval ${save_interval} \
63 | --save-path ${SAVE_PATH} \
64 | --tensorboard-dir ${TENSORBOARD_PATH} \
65 | --clip-grad 1.0 \
66 | --weight-decay 0.0001 \
67 | --adam-beta1 0.9 \
68 | --adam-beta2 0.999 \
69 | --num-workers 16 \
70 | --use-lora \
71 | --gradient-checkpointing \
72 | --bf16"
73 |
74 | multimodal_options=" \
75 | --mm-config configs/sft/release.yaml
76 | "
77 |
78 | python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log
--------------------------------------------------------------------------------
/PaperOwl/scripts/train_it_v100.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # For V100 32G
3 | DIR=`pwd`
4 | export PYTHONPATH=$DIR
5 | DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
6 |
7 | if [ $MASTER_ADDR ];then
8 | echo $MASTER_ADDR
9 | echo $MASTER_PORT
10 | echo $WORLD_SIZE
11 | echo $RANK
12 | else
13 | MASTER_ADDR=127.0.0.1
14 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15
15 | WORLD_SIZE=1
16 | RANK=0
17 | fi
18 |
19 | DISTRIBUTED_ARGS="--nproc_per_node 1 \
20 | --nnodes ${WORLD_SIZE} \
21 | --node_rank ${RANK} \
22 | --master_addr ${MASTER_ADDR} \
23 | --master_port ${MASTER_PORT}"
24 |
25 | EXP_NAME=paperowl
26 |
27 | max_length=2304
28 | micro_batch_size=1
29 | global_batch_size=256
30 | gradient_accumulation_steps=1
31 |
32 | SAVE_NAME=${ureader}_${max_length}_${global_batch_size}
33 |
34 | SAVE_PATH="./output/${EXP_NAME}/"
35 | TENSORBOARD_PATH="./tensorboard/sft/${SAVE_NAME}/"
36 |
37 |
38 | train_epochs=10
39 | train_iters=29000
40 |
41 | lr_warmup_iters=36
42 |
43 | eval_iter=290
44 | eval_interval=1160
45 | save_interval=1160
46 |
47 | mkdir -p ${SAVE_PATH}
48 | mkdir -p ${TENSORBOARD_PATH}
49 |
50 | options=" \
51 | --pretrained-ckpt checkpoints/mplug-owl-llama-7b \
52 | --seq-length ${max_length} \
53 | --micro-batch-size ${micro_batch_size} \
54 | --global-batch-size ${global_batch_size} \
55 | --num-training-steps ${train_iters} \
56 | --train-epochs ${train_epochs} \
57 | --num-warmup-steps ${lr_warmup_iters} \
58 | --gradient-accumulation-steps ${gradient_accumulation_steps} \
59 | --lr 1e-4 \
60 | --min-lr 1e-6 \
61 | --eval-iters ${eval_iter} \
62 | --save-interval ${save_interval} \
63 | --save-path ${SAVE_PATH} \
64 | --tensorboard-dir ${TENSORBOARD_PATH} \
65 | --clip-grad 1.0 \
66 | --weight-decay 0.0001 \
67 | --adam-beta1 0.9 \
68 | --adam-beta2 0.999 \
69 | --num-workers 16 \
70 | --use-lora \
71 | --gradient-checkpointing \
72 | --fp16 \
73 | --deepspeed ds_config.json"
74 |
75 | multimodal_options=" \
76 | --mm-config configs/sft/release.yaml
77 | "
78 |
79 | python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log
--------------------------------------------------------------------------------
/PaperOwl/serve/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/serve/__init__.py
--------------------------------------------------------------------------------
/PaperOwl/serve/gradio_css.py:
--------------------------------------------------------------------------------
1 | code_highlight_css = (
2 | """
3 | #chatbot .hll { background-color: #ffffcc }
4 | #chatbot .c { color: #408080; font-style: italic }
5 | #chatbot .err { border: 1px solid #FF0000 }
6 | #chatbot .k { color: #008000; font-weight: bold }
7 | #chatbot .o { color: #666666 }
8 | #chatbot .ch { color: #408080; font-style: italic }
9 | #chatbot .cm { color: #408080; font-style: italic }
10 | #chatbot .cp { color: #BC7A00 }
11 | #chatbot .cpf { color: #408080; font-style: italic }
12 | #chatbot .c1 { color: #408080; font-style: italic }
13 | #chatbot .cs { color: #408080; font-style: italic }
14 | #chatbot .gd { color: #A00000 }
15 | #chatbot .ge { font-style: italic }
16 | #chatbot .gr { color: #FF0000 }
17 | #chatbot .gh { color: #000080; font-weight: bold }
18 | #chatbot .gi { color: #00A000 }
19 | #chatbot .go { color: #888888 }
20 | #chatbot .gp { color: #000080; font-weight: bold }
21 | #chatbot .gs { font-weight: bold }
22 | #chatbot .gu { color: #800080; font-weight: bold }
23 | #chatbot .gt { color: #0044DD }
24 | #chatbot .kc { color: #008000; font-weight: bold }
25 | #chatbot .kd { color: #008000; font-weight: bold }
26 | #chatbot .kn { color: #008000; font-weight: bold }
27 | #chatbot .kp { color: #008000 }
28 | #chatbot .kr { color: #008000; font-weight: bold }
29 | #chatbot .kt { color: #B00040 }
30 | #chatbot .m { color: #666666 }
31 | #chatbot .s { color: #BA2121 }
32 | #chatbot .na { color: #7D9029 }
33 | #chatbot .nb { color: #008000 }
34 | #chatbot .nc { color: #0000FF; font-weight: bold }
35 | #chatbot .no { color: #880000 }
36 | #chatbot .nd { color: #AA22FF }
37 | #chatbot .ni { color: #999999; font-weight: bold }
38 | #chatbot .ne { color: #D2413A; font-weight: bold }
39 | #chatbot .nf { color: #0000FF }
40 | #chatbot .nl { color: #A0A000 }
41 | #chatbot .nn { color: #0000FF; font-weight: bold }
42 | #chatbot .nt { color: #008000; font-weight: bold }
43 | #chatbot .nv { color: #19177C }
44 | #chatbot .ow { color: #AA22FF; font-weight: bold }
45 | #chatbot .w { color: #bbbbbb }
46 | #chatbot .mb { color: #666666 }
47 | #chatbot .mf { color: #666666 }
48 | #chatbot .mh { color: #666666 }
49 | #chatbot .mi { color: #666666 }
50 | #chatbot .mo { color: #666666 }
51 | #chatbot .sa { color: #BA2121 }
52 | #chatbot .sb { color: #BA2121 }
53 | #chatbot .sc { color: #BA2121 }
54 | #chatbot .dl { color: #BA2121 }
55 | #chatbot .sd { color: #BA2121; font-style: italic }
56 | #chatbot .s2 { color: #BA2121 }
57 | #chatbot .se { color: #BB6622; font-weight: bold }
58 | #chatbot .sh { color: #BA2121 }
59 | #chatbot .si { color: #BB6688; font-weight: bold }
60 | #chatbot .sx { color: #008000 }
61 | #chatbot .sr { color: #BB6688 }
62 | #chatbot .s1 { color: #BA2121 }
63 | #chatbot .ss { color: #19177C }
64 | #chatbot .bp { color: #008000 }
65 | #chatbot .fm { color: #0000FF }
66 | #chatbot .vc { color: #19177C }
67 | #chatbot .vg { color: #19177C }
68 | #chatbot .vi { color: #19177C }
69 | #chatbot .vm { color: #19177C }
70 | #chatbot .il { color: #666666 }
71 | """)
72 | #.highlight { background: #f8f8f8; }
73 |
74 |
--------------------------------------------------------------------------------
/PaperOwl/serve/model_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import re
4 | import torch
5 | import transformers
6 | import traceback
7 |
8 | from queue import Queue
9 | from threading import Thread
10 |
11 |
12 | def post_process_output(text):
13 | text = text.strip()
14 | pattern = re.compile(
15 | r"||||\[PAD\]|<\|endoftext\|>|\[UNK\]|\[CLS\]|\[MASK\]|<\|startofpiece\|>|<\|endofpiece\|>|\[gMASK\]|\[sMASK\]"
16 | )
17 | text = pattern.sub("", text.strip()).strip()
18 | return text
19 |
20 |
21 | def post_process_code(code):
22 | sep = "\n```"
23 | if sep in code:
24 | blocks = code.split(sep)
25 | if len(blocks) % 2 == 1:
26 | for i in range(1, len(blocks), 2):
27 | blocks[i] = blocks[i].replace("\\_", "_")
28 | code = sep.join(blocks)
29 | return code
30 |
31 |
32 | class Stream(transformers.StoppingCriteria):
33 | def __init__(self, callback_func=None):
34 | self.callback_func = callback_func
35 |
36 | def __call__(self, input_ids, scores) -> bool:
37 | if self.callback_func is not None:
38 | self.callback_func(input_ids[0])
39 | return False
40 |
41 |
42 | class Iteratorize:
43 |
44 | """
45 | Transforms a function that takes a callback
46 | into a lazy iterator (generator).
47 | """
48 |
49 | def __init__(self, func, kwargs={}, callback=None):
50 | self.mfunc = func
51 | self.c_callback = callback
52 | self.q = Queue()
53 | self.sentinel = object()
54 | self.kwargs = kwargs
55 | self.stop_now = False
56 |
57 | def _callback(val):
58 | if self.stop_now:
59 | raise ValueError
60 | self.q.put(val)
61 |
62 | def gentask():
63 | try:
64 | ret = self.mfunc(callback=_callback, **self.kwargs)
65 | except ValueError:
66 | pass
67 | except:
68 | traceback.print_exc()
69 | pass
70 |
71 | self.q.put(self.sentinel)
72 | if self.c_callback:
73 | self.c_callback(ret)
74 |
75 | self.thread = Thread(target=gentask)
76 | self.thread.start()
77 |
78 | def __iter__(self):
79 | return self
80 |
81 | def __next__(self):
82 | obj = self.q.get(True, None)
83 | if obj is self.sentinel:
84 | raise StopIteration
85 | else:
86 | return obj
87 |
88 | def __enter__(self):
89 | return self
90 |
91 | def __exit__(self, exc_type, exc_val, exc_tb):
92 | self.stop_now = True
--------------------------------------------------------------------------------
/TinyChart/assets/Paper-Arxiv-orange.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/TinyChart/assets/cases.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/assets/cases.png
--------------------------------------------------------------------------------
/TinyChart/assets/perform_and_speed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/assets/perform_and_speed.png
--------------------------------------------------------------------------------
/TinyChart/images/albums.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/albums.png
--------------------------------------------------------------------------------
/TinyChart/images/college.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/college.png
--------------------------------------------------------------------------------
/TinyChart/images/diseases.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/diseases.png
--------------------------------------------------------------------------------
/TinyChart/images/economy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/economy.png
--------------------------------------------------------------------------------
/TinyChart/images/immigrants.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/immigrants.png
--------------------------------------------------------------------------------
/TinyChart/images/market.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/market.png
--------------------------------------------------------------------------------
/TinyChart/images/sails.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/sails.png
--------------------------------------------------------------------------------
/TinyChart/images/sports.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/sports.png
--------------------------------------------------------------------------------
/TinyChart/images/workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/workers.png
--------------------------------------------------------------------------------
/TinyChart/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "tinyllava"
7 | version = "1.0.0"
8 | description = "A Framework of Small-scale Large Multimodal Models."
9 | readme = "README.md"
10 | requires-python = ">=3.9"
11 | classifiers = [
12 | "Programming Language :: Python :: 3",
13 | "License :: OSI Approved :: Apache Software License",
14 | ]
15 | dependencies = [
16 | "torch==2.0.1", "torchvision==0.15.2", "tiktoken",
17 | "transformers==4.37.2", "tokenizers==0.15.1", "sentencepiece==0.1.99", "shortuuid",
18 | "accelerate==0.21.0", "peft==0.4.0", "bitsandbytes==0.41.0",
19 | "pydantic<2,>=1", "markdown2[all]", "numpy", "scikit-learn==1.2.2",
20 | "gradio==3.35.2", "gradio_client==0.2.9",
21 | "requests", "httpx==0.24.0", "uvicorn", "fastapi",
22 | "einops==0.6.1", "einops-exts==0.0.4", "timm==0.6.13",
23 | ]
24 |
25 | [project.optional-dependencies]
26 | train = ["deepspeed==0.9.5", "ninja", "wandb"]
27 |
28 | [project.urls]
29 | "Homepage" = "https://github.com/X-PLUG/mPLUG-DocOwl/blob/main/TinyChart"
30 | "Bug Tracker" = "https://github.com/X-PLUG/mPLUG-DocOwl/issues"
31 |
32 | [tool.setuptools.packages.find]
33 | exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
34 |
35 | [tool.wheel]
36 | exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
37 |
38 |
--------------------------------------------------------------------------------
/TinyChart/scripts/convert_model_config.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import os
4 |
5 | def convert_config(path):
6 | config_path = path+'/config.json'
7 | config = json.load(open(config_path, 'r'))
8 | assert os.path.isdir(path+'/vision_tower')
9 | try:
10 | os.symlink(path+'/vision_tower', path+'/siglip')
11 | except:
12 | pass
13 | config['mm_vision_tower'] = path+'/siglip'
14 | json.dump(config, open(config_path, 'w'), indent=4, ensure_ascii=False)
15 |
16 | if __name__ == '__main__':
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument('--input', type=str, required=True)
19 |
20 | args = parser.parse_args()
21 |
22 | if args.input[0] != '/':
23 | args.input = os.getcwd() + '/' + args.input
24 |
25 | if os.path.isdir(args.input+'/vision_tower'):
26 | convert_config(args.input)
--------------------------------------------------------------------------------
/TinyChart/scripts/evaluate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Variables
3 | MODEL_PATH=$1
4 | TEST_DATA_PATH=$2
5 |
6 | OUTPUT=${MODEL_PATH}/eval
7 | mkdir -p ${OUTPUT}
8 | cp scripts/evaluate.sh ${OUTPUT}/
9 |
10 |
11 | export PYTHONPATH=./
12 | export PYTHONHASHSEED=42
13 | export PYTHONUNBUFFERED=1
14 |
15 | num_chunks=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l)
16 | time_stamp=$(date +%Y%m%d-%H%M%S)
17 | TEMP_DIR=${OUTPUT}/temp_${time_stamp}
18 | mkdir -p ${TEMP_DIR}
19 |
20 | for ((chunk_idx=0; chunk_idx&1 | tee -a ${OUTPUT}/log.txt &
29 | done
30 | wait
31 |
32 | # Merge split && divide by dataset && calculate metric
33 | python scripts/merge_jsonl_sort.py \
34 | --input ${TEMP_DIR} \
35 | --output ${TEMP_DIR}/all.jsonl
36 | python scripts/split_jsonl_dataset.py \
37 | --input ${TEMP_DIR}/all.jsonl \
38 | --output ${OUTPUT}
39 | python tinychart/eval/run_eval.py \
40 | --input ${OUTPUT}
41 |
--------------------------------------------------------------------------------
/TinyChart/scripts/merge_jsonl_sort.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import argparse
4 |
5 | def read_jsonl(jsonl_path):
6 | with open(jsonl_path, 'r') as f:
7 | data = [json.loads(line) for line in f]
8 | return data
9 |
10 | def write_jsonl(data, jsonl_path):
11 | with open(jsonl_path, 'w', encoding='utf-8') as f:
12 | for item in data:
13 | f.write(json.dumps(item) + '\n')
14 |
15 | if __name__ == '__main__':
16 | parser = argparse.ArgumentParser()
17 | parser.add_argument('--input', default='temp/')
18 | parser.add_argument('--output', default='chartqa_val.json')
19 |
20 | args = parser.parse_args()
21 | files = os.listdir(args.input)
22 | files.sort()
23 | data = []
24 | for file in files:
25 | if file != 'all.jsonl':
26 | data.extend(read_jsonl(os.path.join(args.input, file)))
27 | # data.sort(key=lambda x: int(x['id'].split('_')[-1]))
28 | write_jsonl(data, args.output)
29 |
--------------------------------------------------------------------------------
/TinyChart/scripts/split_jsonl_dataset.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import argparse
4 | from collections import defaultdict
5 |
6 | def read_jsonl(jsonl_path):
7 | with open(jsonl_path, 'r') as f:
8 | data = [json.loads(line) for line in f]
9 | return data
10 |
11 | def write_jsonl(data, jsonl_path):
12 | with open(jsonl_path, 'w', encoding='utf-8') as f:
13 | for item in data:
14 | f.write(json.dumps(item) + '\n')
15 |
16 | if __name__ == '__main__':
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument('--input', default='all.json')
19 | parser.add_argument('--output', default='./output/')
20 |
21 | args = parser.parse_args()
22 |
23 | all_data = read_jsonl(args.input)
24 |
25 | dataset2jsonl = defaultdict(list)
26 |
27 | for item in all_data:
28 | int_id = item['id'].split('_')[-1]
29 | dataset_name_split = '_'.join(item['id'].split('_')[:-1])
30 |
31 | if '-two_col-' in dataset_name_split:
32 | dataset_name_split = dataset_name_split.replace('-two_col-', '-')
33 | if '-multi_col-' in dataset_name_split:
34 | dataset_name_split = dataset_name_split.replace('-multi_col-', '-')
35 |
36 | dataset2jsonl[dataset_name_split].append(item)
37 |
38 | for dataset_name_split, data in dataset2jsonl.items():
39 | data.sort(key=lambda x: int(x['id'].split('_')[-1]))
40 | write_jsonl(data, os.path.join(args.output, f'{dataset_name_split}.jsonl'))
--------------------------------------------------------------------------------
/TinyChart/scripts/train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | TRAIN_DATA=data/train.json
3 | TEST_DATA=data/test.json
4 |
5 | LLM_PATH=bczhou/TinyLLaVA-3.1B
6 | VIT_PATH=pretrained_models/TinyLLaVA-3.1B-SigLIP
7 |
8 | # # If you want to fine-tune TinyChart-3B-768:
9 | # LLM_PATH=mPLUG/TinyChart-3B-768
10 | # VIT_PATH=mPLUG/TinyChart-3B-768-siglip
11 |
12 | OUTPUT=./checkpoints/TinyChart-3B
13 |
14 | mkdir -p ${OUTPUT}
15 | # Copy the script to OUTPUT directory
16 | cp scripts/train.sh ${OUTPUT}/
17 |
18 | export PYTHONPATH=./
19 |
20 | if [ $MASTER_ADDR ];then
21 | echo $MASTER_ADDR
22 | echo $MASTER_PORT
23 | echo $WORLD_SIZE
24 | echo $RANK
25 | else
26 | MASTER_ADDR=127.0.0.1
27 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15
28 | WORLD_SIZE=1
29 | RANK=0
30 | fi
31 | # Change for multinode config
32 | NNODES=${WORLD_SIZE}
33 | NODE_RANK=${RANK}
34 | GPUS_PER_NODE=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
35 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
36 |
37 | torchrun $DISTRIBUTED_ARGS \
38 | tinychart/train/train.py \
39 | --lora_enable False \
40 | --tune_vision_tower True \
41 | --tune_entire_model True \
42 | --tune_vit_from_layer -1 \
43 | --deepspeed scripts/zero3_offload_decay.json \
44 | --model_name_or_path ${LLM_PATH} \
45 | --vision_tower ${VIT_PATH} \
46 | --version v1 \
47 | --data_path ${TRAIN_DATA} \
48 | --image_folder '' \
49 | --mm_projector_type mlp2x_gelu \
50 | --mm_vision_select_layer -2 \
51 | --mm_use_im_start_end False \
52 | --mm_use_im_patch_token False \
53 | --image_aspect_ratio pad \
54 | --group_by_modality_length True \
55 | --fp16 True \
56 | --bf16 False \
57 | --output_dir ${OUTPUT} \
58 | --num_train_epochs 3 \
59 | --per_device_train_batch_size 8 \
60 | --per_device_eval_batch_size 4 \
61 | --gradient_accumulation_steps 2 \
62 | --evaluation_strategy "no" \
63 | --save_strategy "steps" \
64 | --save_steps 1000 \
65 | --save_total_limit 10 \
66 | --learning_rate 1e-4 \
67 | --weight_decay 0. \
68 | --warmup_ratio 0.03 \
69 | --lr_scheduler_type "cosine" \
70 | --logging_steps 1 \
71 | --tf32 False \
72 | --model_max_length 1024 \
73 | --gradient_checkpointing True \
74 | --dataloader_num_workers 4 \
75 | --lazy_preprocess True \
76 | --report_to tensorboard \
77 | 2>&1 | tee -a ${OUTPUT}/log.${RANK}.txt
78 |
79 | # Evaluate
80 | if [ $RANK -eq 0 ]; then
81 | python scripts/convert_model_config.py --input ${OUTPUT}
82 | bash scripts/evaluate.sh ${OUTPUT} ${TEST_DATA}
83 | fi
--------------------------------------------------------------------------------
/TinyChart/scripts/vit_add_tome.py:
--------------------------------------------------------------------------------
1 | import json
2 | import argparse
3 |
4 | def read_json(data_path):
5 | with open(data_path, 'r', encoding='utf-8') as f:
6 | data = json.load(f)
7 | return data
8 |
9 | def write_json(data, data_path):
10 | with open(data_path, 'w', encoding='utf-8') as f:
11 | json.dump(data, f, indent=4, ensure_ascii=False)
12 | return
13 |
14 | if __name__ == '__main__':
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument('--path', type=str)
17 | parser.add_argument('--image_size', type=int, default=768)
18 | parser.add_argument('--tome_r', type=int, default=84)
19 |
20 | args = parser.parse_args()
21 |
22 | config = read_json(args.path+'/config.json')
23 | config['use_tome'] = True
24 | config['image_size'] = args.image_size
25 | config['tome_r'] = args.tome_r
26 | write_json(config, args.path+'/config.json')
27 |
28 |
--------------------------------------------------------------------------------
/TinyChart/scripts/zero3_offload_decay.json:
--------------------------------------------------------------------------------
1 | {
2 | "fp16": {
3 | "enabled": "auto",
4 | "loss_scale": 0,
5 | "loss_scale_window": 1000,
6 | "initial_scale_power": 16,
7 | "hysteresis": 2,
8 | "min_loss_scale": 1
9 | },
10 | "bf16": {
11 | "enabled": "auto"
12 | },
13 | "zero_optimization": {
14 | "stage": 3,
15 | "offload_optimizer": {
16 | "device": "cpu",
17 | "pin_memory": true
18 | },
19 | "offload_param": {
20 | "device": "cpu",
21 | "pin_memory": true
22 | },
23 | "overlap_comm": true,
24 | "contiguous_gradients": true,
25 | "sub_group_size": 1e9,
26 | "reduce_bucket_size": "auto",
27 | "stage3_prefetch_bucket_size": "auto",
28 | "stage3_param_persistence_threshold": "auto",
29 | "stage3_max_live_parameters": 1e9,
30 | "stage3_max_reuse_distance": 1e9,
31 | "gather_16bit_weights_on_model_save": true
32 | },
33 | "gradient_accumulation_steps": "auto",
34 | "gradient_clipping": "auto",
35 | "train_batch_size": "auto",
36 | "train_micro_batch_size_per_gpu": "auto",
37 | "steps_per_print": 1e5,
38 | "wall_clock_breakdown": false
39 | }
--------------------------------------------------------------------------------
/TinyChart/tinychart/__init__.py:
--------------------------------------------------------------------------------
1 | from tinychart.model import *
2 |
--------------------------------------------------------------------------------
/TinyChart/tinychart/constants.py:
--------------------------------------------------------------------------------
1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30
2 | WORKER_HEART_BEAT_INTERVAL = 15
3 |
4 | LOGDIR = "."
5 |
6 | # Model Constants
7 | IGNORE_INDEX = -100
8 | IMAGE_TOKEN_INDEX = -200
9 | DEFAULT_IMAGE_TOKEN = ""
10 | DEFAULT_IMAGE_PATCH_TOKEN = ""
11 | DEFAULT_IM_START_TOKEN = ""
12 | DEFAULT_IM_END_TOKEN = ""
13 | IMAGE_PLACEHOLDER = ""
14 |
--------------------------------------------------------------------------------
/TinyChart/tinychart/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/tinychart/data/__init__.py
--------------------------------------------------------------------------------
/TinyChart/tinychart/data/preprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/tinychart/data/preprocess/__init__.py
--------------------------------------------------------------------------------
/TinyChart/tinychart/data/process.py:
--------------------------------------------------------------------------------
1 | import os
2 | import importlib
3 | from typing import Dict, Optional, Sequence, List
4 |
5 | import transformers
6 |
7 | from tinychart.constants import DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
8 | from tinychart import conversation as conversation_lib
9 | from tinychart.arguments import *
10 |
11 | PREPROCESS_REGISTRY = {}
12 |
13 | def register_preprocess(name):
14 | def register_preprocess_cls(cls):
15 | if name in PREPROCESS_REGISTRY:
16 | return PREPROCESS_REGISTRY[name]
17 |
18 | PREPROCESS_REGISTRY[name] = cls
19 | return cls
20 |
21 | return register_preprocess_cls
22 |
23 |
24 | def import_modules(modules_dir, namespace):
25 | for file in os.listdir(modules_dir):
26 | path = os.path.join(modules_dir, file)
27 |
28 | if (
29 | not file.startswith("_")
30 | and not file.startswith(".")
31 | and (file.endswith(".py") or os.path.isdir(path))
32 | ):
33 | module_name = file[: file.find(".py")] if file.endswith(".py") else file
34 | importlib.import_module(namespace + "." + module_name)
35 |
36 | models_dir = os.path.join(os.path.dirname(__file__), 'preprocess')
37 | import_modules(models_dir, "tinychart.data.preprocess")
38 |
39 |
40 | def PreprocessSelect(version):
41 | result = PREPROCESS_REGISTRY.get(version, None)
42 | if result is None:
43 | for name in PREPROCESS_REGISTRY.keys():
44 | if version in name:
45 | result = PREPROCESS_REGISTRY[name]
46 | break
47 | if result is None:
48 | result = PREPROCESS_REGISTRY['default']
49 | return result
50 |
51 |
52 |
53 | def preprocess_multimodal(
54 | sources: Sequence[str],
55 | data_args: DataArguments
56 | ) -> Dict:
57 | is_multimodal = data_args.is_multimodal
58 | if not is_multimodal:
59 | return sources
60 |
61 | for source in sources:
62 | for sentence in source:
63 | if DEFAULT_IMAGE_TOKEN in sentence['value']:
64 | sentence['value'] = sentence['value'].replace(DEFAULT_IMAGE_TOKEN, '').strip()
65 | sentence['value'] = DEFAULT_IMAGE_TOKEN + '\n' + sentence['value']
66 | sentence['value'] = sentence['value'].strip()
67 | if "mmtag" in conversation_lib.default_conversation.version:
68 | sentence['value'] = sentence['value'].replace(DEFAULT_IMAGE_TOKEN,
69 | '' + DEFAULT_IMAGE_TOKEN + '')
70 | replace_token = DEFAULT_IMAGE_TOKEN
71 | if data_args.mm_use_im_start_end:
72 | replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN
73 | sentence["value"] = sentence["value"].replace(DEFAULT_IMAGE_TOKEN, replace_token)
74 |
75 | return sources
76 |
77 |
78 | def preprocess(
79 | sources: Sequence[str],
80 | tokenizer: transformers.PreTrainedTokenizer,
81 | has_image: bool = False
82 | ) -> Dict:
83 | return PreprocessSelect(conversation_lib.default_conversation.version)(sources, tokenizer, has_image)
84 |
--------------------------------------------------------------------------------
/TinyChart/tinychart/eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/tinychart/eval/__init__.py
--------------------------------------------------------------------------------
/TinyChart/tinychart/eval/eval_chart2text.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import sys
4 | import sacrebleu
5 | import numpy as np
6 |
7 | def chart2text_evaluator(data, temp_dir='/output/temp'):
8 | if temp_dir[-1] == '/':
9 | temp_dir = temp_dir[:-1]
10 | cands = []
11 | refs = []
12 | for item in data:
13 | cands.append(item['model_answer'])
14 | refs.append(item['gt_answer'])
15 |
16 | bleu = sacrebleu.corpus_bleu(cands, [refs], lowercase=True).score
17 |
18 | return bleu
--------------------------------------------------------------------------------
/TinyChart/tinychart/model/__init__.py:
--------------------------------------------------------------------------------
1 | from tinychart.model.language_model.llava_phi import TinyChartPhiForCausalLM, TinyChartPhiConfig
--------------------------------------------------------------------------------
/TinyChart/tinychart/model/language_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/tinychart/model/language_model/__init__.py
--------------------------------------------------------------------------------
/TinyChart/tinychart/model/model_factory.py:
--------------------------------------------------------------------------------
1 | import os
2 | import importlib
3 |
4 | MODEL_REGISTRY = {}
5 | TOKENIZER_REGISTRY = {}
6 |
7 |
8 | def ModelSelect(model_name_or_path):
9 | model = None
10 | for name in MODEL_REGISTRY.keys():
11 | if name in model_name_or_path.lower():
12 | model = MODEL_REGISTRY[name]
13 | if model is None:
14 | model = MODEL_REGISTRY['llama']
15 | return model
16 |
17 |
18 | def TokenizerSelect(model_name_or_path):
19 | tokenizer_init = None
20 | for name in TOKENIZER_REGISTRY.keys():
21 | if name in model_name_or_path.lower():
22 | tokenizer_init = TOKENIZER_REGISTRY[name]
23 | if tokenizer_init is None:
24 | tokenizer_init = TOKENIZER_REGISTRY['llama']
25 | return tokenizer_init
26 |
27 |
28 | def register_model(name):
29 | def register_model_cls(cls):
30 | if name in MODEL_REGISTRY:
31 | return MODEL_REGISTRY[name]
32 |
33 | MODEL_REGISTRY[name] = cls
34 | return cls
35 |
36 | return register_model_cls
37 |
38 |
39 | def register_tokenizer(name):
40 | def register_tokenizer_cls(cls):
41 | if name in TOKENIZER_REGISTRY:
42 | return TOKENIZER_REGISTRY[name]
43 |
44 | TOKENIZER_REGISTRY[name] = cls
45 | return cls
46 |
47 | return register_tokenizer_cls
48 |
49 |
50 | def import_models(models_dir, namespace):
51 | for file in os.listdir(models_dir):
52 | path = os.path.join(models_dir, file)
53 | if (
54 | not file.startswith("_")
55 | and not file.startswith(".")
56 | and file.endswith(".py")
57 | ):
58 | model_name = file[: file.find(".py")] if file.endswith(".py") else file
59 | importlib.import_module(namespace + "." + model_name)
60 |
61 |
62 | # automatically import any Python files in the models/ directory
63 | models_dir = os.path.join(os.path.dirname(__file__), 'language_model')
64 | import_models(models_dir, "tinychart.model.language_model")
65 |
--------------------------------------------------------------------------------
/TinyChart/tinychart/model/multimodal_encoder/builder.py:
--------------------------------------------------------------------------------
1 | import os
2 | from tinychart.model.multimodal_encoder.siglip_encoder import SigLipVisionTower
3 |
4 | def build_vision_tower(vision_tower_cfg, **kwargs):
5 | vision_tower = getattr(vision_tower_cfg, 'mm_vision_tower', getattr(vision_tower_cfg, 'vision_tower', None))
6 | is_absolute_path_exists = os.path.exists(vision_tower)
7 | return SigLipVisionTower(vision_tower, vision_tower_cfg, **kwargs)
--------------------------------------------------------------------------------
/TinyChart/tinychart/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/tinychart/train/__init__.py
--------------------------------------------------------------------------------
/UReader/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py eol=lf
2 | *.rst eol=lf
3 | *.md eol=lf
4 | *.mdx eol=lf
--------------------------------------------------------------------------------
/UReader/.gitignore:
--------------------------------------------------------------------------------
1 | evaluate_results*
2 | checkpoints/
3 | benchmark_files/
4 | ureader_images
5 | ureader_json
6 | ureader_images/
7 | ureader_json/
8 | # Initially taken from Github's Python gitignore file
9 | tensorboard/*
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 | *$py.class
14 | .ossutil_*
15 | # C extensions
16 | *.so
17 |
18 | # tests and logs
19 | tests/fixtures/cached_*_text.txt
20 | logs/
21 | lightning_logs/
22 | lang_code_data/
23 |
24 | # Distribution / packaging
25 | .Python
26 | build/
27 | develop-eggs/
28 | dist/
29 | downloads/
30 | eggs/
31 | .eggs/
32 | lib/
33 | lib64/
34 | parts/
35 | sdist/
36 | var/
37 | wheels/
38 | *.egg-info/
39 | .installed.cfg
40 | *.egg
41 | MANIFEST
42 |
43 | # PyInstaller
44 | # Usually these files are written by a python script from a template
45 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
46 | *.manifest
47 | *.spec
48 |
49 | # Installer logs
50 | pip-log.txt
51 | pip-delete-this-directory.txt
52 |
53 | # Unit test / coverage reports
54 | htmlcov/
55 | .tox/
56 | .nox/
57 | .coverage
58 | .coverage.*
59 | .cache
60 | nosetests.xml
61 | coverage.xml
62 | *.cover
63 | .hypothesis/
64 | .pytest_cache/
65 |
66 | # Translations
67 | *.mo
68 | *.pot
69 |
70 | # Django stuff:
71 | *.log
72 | local_settings.py
73 | db.sqlite3
74 |
75 | # Flask stuff:
76 | instance/
77 | .webassets-cache
78 |
79 | # Scrapy stuff:
80 | .scrapy
81 |
82 | # Sphinx documentation
83 | docs/_build/
84 |
85 | # PyBuilder
86 | target/
87 |
88 | # Jupyter Notebook
89 | .ipynb_checkpoints
90 |
91 | # IPython
92 | profile_default/
93 | ipython_config.py
94 |
95 | # pyenv
96 | .python-version
97 |
98 | # celery beat schedule file
99 | celerybeat-schedule
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # vscode
132 | .vs
133 | .vscode
134 |
135 | # Pycharm
136 | .idea
137 |
138 | # TF code
139 | tensorflow_code
140 |
141 | # Models
142 | proc_data
143 |
144 | # examples
145 | runs
146 | /runs_old
147 | /wandb
148 | /output
149 | /configs_dev
150 | /scripts_dev
151 | # /examples/runs
152 | # /examples/**/*.args
153 | # /examples/rag/sweep
154 |
155 | # data
156 | /data
157 | serialization_dir
158 |
159 | # emacs
160 | *.*~
161 | debug.env
162 |
163 | # vim
164 | .*.swp
165 |
166 | #ctags
167 | tags
168 |
169 | # pre-commit
170 | .pre-commit*
171 |
172 | # .lock
173 | *.lock
174 |
175 | # DS_Store (MacOS)
176 | .DS_Store
177 |
178 | # ruff
179 | .ruff_cache
180 |
--------------------------------------------------------------------------------
/UReader/app.py:
--------------------------------------------------------------------------------
1 | import os
2 | # import wget
3 | # resources = os.getenv('resources_new')
4 | # resources_filename = wget.download(resources)
5 |
6 | # os.system('tar zxvf {}'.format(resources_filename))
7 |
8 | # os.system('ls -l')
9 |
10 | import argparse
11 | import datetime
12 | import json
13 | import os
14 | import time
15 | import torch
16 |
17 | import gradio as gr
18 | import requests
19 | from pipeline.utils import add_config_args, set_args
20 | from sconf import Config
21 |
22 |
23 | if __name__ == "__main__":
24 | from serve.serve_utils import init
25 | io = init()
26 | cur_dir = os.path.dirname(os.path.abspath(__file__))
27 | log_dir = cur_dir[:-9] + "log"
28 |
29 | parser = argparse.ArgumentParser()
30 | parser.add_argument("--host", type=str, default="0.0.0.0")
31 | parser.add_argument("--debug", action="store_true", help="using debug mode")
32 | parser.add_argument("--port", type=int)
33 | parser.add_argument("--concurrency-count", type=int, default=100)
34 | parser.add_argument("--base-model",type=str, default='checkpoints/ureader')
35 | parser.add_argument("--load-8bit", action="store_true", help="using 8bit mode")
36 | parser.add_argument("--bf16", action="store_true", help="using 8bit mode")
37 | parser.add_argument("--mm_config", type=str, default='configs/sft/release.yaml')
38 | args = parser.parse_args()
39 | config = Config(args.mm_config)
40 | add_config_args(config, args)
41 | set_args(args)
42 | if torch.cuda.is_available():
43 | device = "cuda"
44 | else:
45 | device = "cpu"
46 | from serve.web_server import mPLUG_Owl_Server, build_demo
47 | model = mPLUG_Owl_Server(
48 | base_model=args.base_model,
49 | log_dir=log_dir,
50 | load_in_8bit=args.load_8bit,
51 | bf16=args.bf16,
52 | device=device,
53 | io=io,
54 | config=config
55 | )
56 | demo = build_demo(model)
57 | demo.queue(concurrency_count=args.concurrency_count, status_update_rate=10, api_open=False).launch(server_name=args.host, debug=args.debug, server_port=args.port, share=False)
--------------------------------------------------------------------------------
/UReader/assets/-twitter-blue.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/UReader/assets/Demo-ModelScope-brightgreen.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/UReader/assets/LICENSE-Apache License-blue.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/UReader/assets/Paper-Arxiv-orange.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/UReader/assets/Paper-PDF-orange.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/UReader/assets/intro_case.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/assets/intro_case.jpg
--------------------------------------------------------------------------------
/UReader/assets/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/assets/model.png
--------------------------------------------------------------------------------
/UReader/assets/modelscope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/assets/modelscope.png
--------------------------------------------------------------------------------
/UReader/assets/modelscopeIcon.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/UReader/configs/sft/release.yaml:
--------------------------------------------------------------------------------
1 | # [ [1, 1],
2 | # [1, 2], [2, 1],
3 | # [3, 1], [1, 3],
4 | # [2, 2], [4, 1], [1, 4],
5 | # [5, 1], [1, 5],
6 | # [1, 6], [6, 1],
7 | # [7, 1], [1, 7],
8 | # [4, 2], [2, 4], [1, 8], [8, 1],
9 | # [1, 9], [3, 3], [9, 1],
10 | # [6, 2], [2, 6],
11 | # [2, 8], [8, 2], [4, 4],
12 | # [3, 6], [6, 3],
13 | # [10, 2], [2, 10],
14 | # [12, 2], [2, 12],
15 | # [5, 5],
16 | # [3, 9], [9, 3],
17 | # [2, 14], [14, 2],
18 | # [4, 8], [2, 16], [8, 4], [16, 2],
19 | # [12, 3], [3, 12], [18, 2], [2, 18], [6, 6]]
20 | train_processors: {
21 | sft: {type: 'DocNewMultiScaleSFTProcessor', image_size: 224,
22 | anchors: [[1, 1], [1, 2], [2, 1], [1, 3], [3, 1], [1, 4], [2, 2], [4, 1], [1, 5], [5, 1], [1, 6], [2, 3], [3, 2], [6, 1], [1, 7], [7, 1], [1, 8], [2, 4], [4, 2], [8, 1], [1, 9], [3, 3], [9, 1], [1, 10], [2, 5], [5, 2], [10, 1], [1, 11], [11, 1], [2, 6], [3, 4], [4, 3], [6, 2], [2, 7], [7, 2], [3, 5], [5, 3], [2, 8], [4, 4], [8, 2], [2, 9], [3, 6], [6, 3], [9, 2], [2, 10], [4, 5], [5, 4], [10, 2]]}
23 | }
24 |
25 | valid_processors: {
26 | sft: {type: 'DocNewMultiScaleSFTProcessor', image_size: 224,
27 | anchors: [[1, 1], [1, 2], [2, 1], [1, 3], [3, 1], [1, 4], [2, 2], [4, 1], [1, 5], [5, 1], [1, 6], [2, 3], [3, 2], [6, 1], [1, 7], [7, 1], [1, 8], [2, 4], [4, 2], [8, 1], [1, 9], [3, 3], [9, 1], [1, 10], [2, 5], [5, 2], [10, 1], [1, 11], [11, 1], [2, 6], [3, 4], [4, 3], [6, 2], [2, 7], [7, 2], [3, 5], [5, 3], [2, 8], [4, 4], [8, 2], [2, 9], [3, 6], [6, 3], [9, 2], [2, 10], [4, 5], [5, 4], [10, 2]]}
28 | }
29 |
30 | data_files: [
31 | 'ureader_json/train.jsonl',
32 | 'ureader_json/val.jsonl'
33 | ]
34 | # 654840
35 |
36 | patch_pos_embed_type: post
37 |
38 |
--------------------------------------------------------------------------------
/UReader/ds_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "fp16": {
3 | "enabled": "auto",
4 | "loss_scale": 0,
5 | "loss_scale_window": 1000,
6 | "initial_scale_power": 16,
7 | "hysteresis": 2,
8 | "min_loss_scale": 1
9 | },
10 | "bf16": {
11 | "enabled": "auto"
12 | },
13 | "zero_optimization": {
14 | "stage": 1
15 | },
16 | "train_batch_size": "auto",
17 | "train_micro_batch_size_per_gpu": "auto"
18 | }
--------------------------------------------------------------------------------
/UReader/examples/Yao_Ming.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/Yao_Ming.jpeg
--------------------------------------------------------------------------------
/UReader/examples/ca.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/ca.jpeg
--------------------------------------------------------------------------------
/UReader/examples/docowl.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/docowl.jpg
--------------------------------------------------------------------------------
/UReader/examples/fridge.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/fridge.jpg
--------------------------------------------------------------------------------
/UReader/examples/fruits.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/fruits.jpg
--------------------------------------------------------------------------------
/UReader/examples/laundry.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/laundry.jpeg
--------------------------------------------------------------------------------
/UReader/examples/monalisa-fun.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/monalisa-fun.jpg
--------------------------------------------------------------------------------
/UReader/examples/monday.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/monday.jpg
--------------------------------------------------------------------------------
/UReader/examples/mug_ad.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/mug_ad.jpeg
--------------------------------------------------------------------------------
/UReader/examples/owl.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/owl.jpg
--------------------------------------------------------------------------------
/UReader/examples/rap.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/rap.jpeg
--------------------------------------------------------------------------------
/UReader/examples/table.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/table.jpg
--------------------------------------------------------------------------------
/UReader/examples/titanic.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/titanic.jpeg
--------------------------------------------------------------------------------
/UReader/examples/vga.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/vga.jpeg
--------------------------------------------------------------------------------
/UReader/examples/website.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/website.jpg
--------------------------------------------------------------------------------
/UReader/mplug_owl/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import TYPE_CHECKING
15 |
16 | from transformers.utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
17 |
18 |
19 | _import_structure = {
20 | "configuration_mplug_owl": ["MPLUG_OWL_PRETRAINED_CONFIG_ARCHIVE_MAP", "MplugOwlConfig"],
21 | "processing_mplug_owl": ["MplugOwlImageProcessor", "MplugOwlProcessor"],
22 | "tokenization_mplug_owl": ["MplugOwlTokenizer"],
23 | }
24 |
25 | try:
26 | if not is_tokenizers_available():
27 | raise OptionalDependencyNotAvailable()
28 | except OptionalDependencyNotAvailable:
29 | pass
30 |
31 |
32 | try:
33 | if not is_torch_available():
34 | raise OptionalDependencyNotAvailable()
35 | except OptionalDependencyNotAvailable:
36 | pass
37 | else:
38 | _import_structure["modeling_mplug_owl"] = [
39 | "MPLUG_OWL_PRETRAINED_MODEL_ARCHIVE_LIST",
40 | "MplugOwlForConditionalGeneration",
41 | "MplugOwlModel",
42 | ]
43 |
44 |
45 | if TYPE_CHECKING:
46 | from .configuration_mplug_owl import MPLUG_OWL_PRETRAINED_CONFIG_ARCHIVE_MAP, MplugOwlConfig
47 | from .tokenization_mplug_owl import MplugOwlTokenizer
48 |
49 | try:
50 | if not is_tokenizers_available():
51 | raise OptionalDependencyNotAvailable()
52 | except OptionalDependencyNotAvailable:
53 | pass
54 |
55 | try:
56 | if not is_torch_available():
57 | raise OptionalDependencyNotAvailable()
58 | except OptionalDependencyNotAvailable:
59 | pass
60 | else:
61 | from .modeling_mplug_owl import (
62 | MPLUG_OWL_PRETRAINED_MODEL_ARCHIVE_LIST,
63 | MplugOwlForConditionalGeneration,
64 | MplugOwlModel,
65 | MplugOwlPreTrainedModel,
66 | )
67 |
68 |
69 | else:
70 | import sys
71 |
72 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
73 |
74 | from .configuration_mplug_owl import *
75 | from .modeling_mplug_owl import *
76 | from .processing_mplug_owl import *
77 | from .tokenization_mplug_owl import *
78 |
--------------------------------------------------------------------------------
/UReader/mplug_owl/tokenization_mplug_owl.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2022 x-plug and The HuggingFace Inc. team. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Tokenization classes for MplugOwl."""
16 |
17 | from transformers.utils import logging
18 | from transformers.models.llama.tokenization_llama import LlamaTokenizer
19 |
20 |
21 | logger = logging.get_logger(__name__)
22 |
23 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
24 |
25 | PRETRAINED_VOCAB_FILES_MAP = {
26 | "vocab_file": {
27 | "MAGAer13/mplug-owl-llama-7b": "https://huggingface.co/MAGAer13/mplug-owl-llama-7b/resolve/main/vocab.txt",
28 | },
29 | }
30 |
31 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
32 | "MAGAer13/mplug-owl-llama-7b": 1024,
33 | }
34 |
35 |
36 | class MplugOwlTokenizer(LlamaTokenizer):
37 | def __init__(
38 | self,
39 | vocab_file,
40 | unk_token="",
41 | bos_token="",
42 | eos_token="",
43 | pad_token="",
44 | sp_model_kwargs=None,
45 | add_bos_token=False,
46 | add_eos_token=False,
47 | clean_up_tokenization_spaces=False,
48 | **kwargs,
49 | ):
50 | super().__init__(
51 | vocab_file,
52 | unk_token,
53 | bos_token,
54 | eos_token,
55 | pad_token,
56 | sp_model_kwargs,
57 | add_bos_token,
58 | add_eos_token,
59 | clean_up_tokenization_spaces,
60 | **kwargs,
61 | )
62 | self.eod_id = self.eos_token_id
63 |
--------------------------------------------------------------------------------
/UReader/pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/pipeline/__init__.py
--------------------------------------------------------------------------------
/UReader/pipeline/data_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .processors.builder import build_processors
2 | from .xgpt3_dataset import MultiModalDataset
3 |
4 | def train_valid_test_datasets_provider(data_path, config, tokenizer, seq_length=1024,image_root='ureader_images'):
5 | """Build train and valid datasets."""
6 | print('> building train and validation datasets for mPLUG-Owl ...')
7 | train_ds, valid_ds = build_train_valid_test_datasets(
8 | input_file=data_path,
9 | tokenizer=tokenizer,
10 | max_length=seq_length,
11 | config=config,
12 | image_root=image_root)
13 | print("> finished creating mPLUG-Owl datasets ...")
14 |
15 | return train_ds, valid_ds
16 |
17 | def build_train_valid_test_datasets(input_file, tokenizer, max_length=80, config=None,image_root='ureader_images'):
18 | train_processors = build_processors(config['train_processors'])
19 | valid_processors = build_processors(config['valid_processors'])
20 | if isinstance(input_file, dict):
21 | train_ds = MultiModalDataset(input_file['train'][0], tokenizer, train_processors, max_length, image_root=image_root)
22 | valid_ds = {name: MultiModalDataset(ds, tokenizer, valid_processors, max_length) for name,ds in input_file['valid'].items()}
23 | test_ds = None
24 |
25 | else:
26 | assert len(input_file) == 2 # If you have files more than 2, modify code at here or merger them into train and dev
27 | train_ds = MultiModalDataset(input_file[0], tokenizer, train_processors, max_length)
28 | valid_ds = MultiModalDataset(input_file[1], tokenizer, valid_processors, max_length)
29 | test_ds = None
30 | return (train_ds, valid_ds)
31 |
--------------------------------------------------------------------------------
/UReader/pipeline/data_utils/processors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Alibaba. All rights reserved.
2 | from .builder import PROCESSORS, build_processors
3 | from .default_processor import DefaultProcessor
4 | from .caption_processor import CaptionProcessor
5 | from .doc_processor import DocPretrainProcessor, DocSFTProcessor
6 | __all__ = [
7 | 'PROCESSORS', 'build_processors',
8 | 'DefaultProcessor', 'CaptionProcessor',
9 | 'DocPretrainProcessor', 'DocSFTProcessor'
10 | ]
--------------------------------------------------------------------------------
/UReader/pipeline/data_utils/processors/builder.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from icecream import ic
4 | from pipeline.data_utils.registry import Registry, build_from_cfg
5 | # from .data_utils.registry import Registry, build_from_cfg
6 |
7 | PROCESSORS = Registry('processors')
8 |
9 | def build_processors(processors_cfg):
10 | processors = dict()
11 | for task, processor in processors_cfg.items():
12 | processors[task] = build_from_cfg(processor, PROCESSORS)
13 | ic(type(processors[task]))
14 | return processors
15 |
--------------------------------------------------------------------------------
/UReader/pipeline/data_utils/processors/caption_processor.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision import transforms
3 | from PIL import Image
4 | import random
5 |
6 | from pipeline.data_utils.randaugment import RandomAugment
7 | from .builder import PROCESSORS
8 |
9 |
10 | @PROCESSORS.register_module()
11 | class CaptionProcessor:
12 | def __init__(self, image_size=224, min_scale = 0.5, randaug=False):
13 | self.image_size = image_size
14 | self.min_scale = min_scale
15 |
16 | if randaug:
17 | self.image_transform = transforms.Compose([
18 | transforms.RandomResizedCrop(image_size,scale=(min_scale, 1.0), interpolation=Image.BICUBIC),
19 | transforms.RandomHorizontalFlip(),
20 | RandomAugment(2,7,isPIL=True,augs=['Identity','AutoContrast','Equalize','Brightness','Sharpness',
21 | 'ShearX', 'ShearY', 'TranslateX', 'TranslateY', 'Rotate']),
22 | transforms.ToTensor(),
23 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
24 | ])
25 | else:
26 | self.image_transform = transforms.Compose([
27 | transforms.RandomResizedCrop(image_size,scale=(min_scale, 1.0), interpolation=Image.BICUBIC),
28 | transforms.RandomHorizontalFlip(),
29 | transforms.ToTensor(),
30 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
31 | ])
32 | self.text_transform = None
33 |
34 | def __call__(self, image, text):
35 | assert image or text
36 |
37 | if image:
38 | image_input = self.image_transform(image)
39 | else:
40 | image_input = None
41 |
42 | if text:
43 | if isinstance(text["prompt"], list):
44 | prompt = random.choice(text["prompt"])
45 | else:
46 | prompt = text["prompt"]
47 | text_input = dict(
48 | prompt=prompt,
49 | completion=text["text"],
50 | )
51 | else:
52 | text_input = None
53 | return image_input, text_input
--------------------------------------------------------------------------------
/UReader/pipeline/data_utils/processors/default_processor.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision import transforms
3 | from PIL import Image
4 | import random
5 |
6 | from pipeline.data_utils.randaugment import RandomAugment
7 | from .builder import PROCESSORS
8 |
9 |
10 | @PROCESSORS.register_module()
11 | class DefaultProcessor:
12 | def __init__(self, image_size=224):
13 | self.image_size = image_size
14 |
15 | self.image_transform = transforms.Compose([
16 | transforms.Resize((image_size, image_size),interpolation=Image.BICUBIC),
17 | transforms.ToTensor(),
18 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
19 | ])
20 |
21 | self.text_transform = None
22 |
23 | def __call__(self, image, text):
24 | assert image or text
25 |
26 | if image:
27 | image_input = self.image_transform(image)
28 | else:
29 | image_input = None
30 |
31 | if text:
32 | if isinstance(text["prompt"], list):
33 | prompt = random.choice(text["prompt"])
34 | else:
35 | prompt = text["prompt"]
36 | text_input = dict(
37 | prompt=prompt,
38 | completion=text["text"],
39 | )
40 | else:
41 | text_input = None
42 | return image_input, text_input
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/pipeline/eval_utils/__init__.py
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | from .__main__ import cli_main
2 | from .due_evaluator import DueEvaluator
3 |
4 | __all__ = ['DueEvaluator', 'cli_main']
5 |
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import argparse
5 | import sys
6 | from typing import Optional, Set
7 | import json
8 |
9 | from .due_evaluator import DueEvaluator
10 | from .utils import property_scores_to_string
11 |
12 |
13 | def parse_args():
14 | """Parse CLI arguments.
15 |
16 | Returns:
17 | namespace: namespace with parsed variables.
18 |
19 | """
20 | parser = argparse.ArgumentParser('Document Understanding Evaluator')
21 | parser.add_argument(
22 | '--out-files',
23 | '-o',
24 | type=argparse.FileType('r', encoding='utf-8'),
25 | required=True,
26 | nargs='+',
27 | help='Out file to evaluate',
28 | )
29 | parser.add_argument(
30 | '--reference', '-r', type=argparse.FileType('r', encoding='utf-8'), required=True, help='Reference file',
31 | )
32 | parser.add_argument('--metric', '-m', type=str, default='F1', choices=['F1', 'MEAN-F1', 'ANLS', 'WTQ', 'GROUP-ANLS'])
33 | parser.add_argument(
34 | '--return-score',
35 | default='F1',
36 | choices=['F1', 'mean-F1', 'ANLS', 'mean-Precision', 'mean-Recall', 'WTQ'],
37 | help='Return WR-like mean-F1 score',
38 | )
39 | parser.add_argument('--line-by-line', action='store_true', default=False, help='Return retults example-based')
40 | parser.add_argument(
41 | '--columns', type=str, nargs='+', default=['Precision', 'Recall', 'F1'], help='Columns',
42 | )
43 | parser.add_argument(
44 | '--print-format',
45 | default='text',
46 | type=str,
47 | choices=['text', 'latex', 'json'],
48 | help='Print feature table in the given format',
49 | )
50 | parser.add_argument('--properties', nargs='+', type=str, help='Property set to be limitted to')
51 | parser.add_argument(
52 | '--ignore-case', '-i', action='store_true', default=False, help='Property set to be limitted to',
53 | )
54 | return parser.parse_args()
55 |
56 |
57 | def cli_main(args: argparse.Namespace):
58 | """CLI main.
59 |
60 | Args:
61 | args: cli arguments
62 | """
63 | reference = [json.loads(line) for line in args.reference]
64 |
65 | evaluators = []
66 | for out_file in args.out_files:
67 | predictions = [json.loads(line) for line in out_file]
68 |
69 | property_set: Optional[Set[str]]
70 | if args.properties:
71 | property_set = args.properties
72 | else:
73 | property_set = None
74 |
75 | evaluators.append(
76 | DueEvaluator(reference, predictions, property_set, args.ignore_case, out_file.name, args.metric)
77 | )
78 |
79 | prop_str = property_scores_to_string(evaluators, args.print_format, args.columns)
80 | if args.print_format != 'json':
81 | print(prop_str, file=sys.stderr)
82 |
83 | if args.line_by_line:
84 | for idx, score in enumerate(evaluators[0].line_by_line()):
85 | print(f'{idx}: {score}', file=sys.stderr)
86 | return prop_str
87 |
88 |
89 | def main() -> None:
90 | """Main."""
91 | args = parse_args()
92 | cli_main(args)
93 |
94 |
95 | if __name__ == '__main__':
96 | main()
97 |
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/__version__.py:
--------------------------------------------------------------------------------
1 | """Version specification."""
2 |
3 | VERSION = (0, 0, 8)
4 | __version__ = '.'.join(map(str, VERSION))
5 |
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/pipeline/eval_utils/due_evaluator/py.typed
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/scorers/__init__.py:
--------------------------------------------------------------------------------
1 | from .anls_scorer import AnlsScorer
2 | from .base_scorer import BaseScorer
3 | from .fscorer import FScorer
4 | from .mean_fscorer import MeanFScorer
5 | from .wtq_scorer import WtqScorer
6 | from .group_anls import GroupAnlsScorer
7 | from .geval_scorer import GevalScorer
8 |
9 | __all__ = ['AnlsScorer', 'BaseScorer', 'FScorer', 'MeanFScorer', 'WtqScorer', 'GevalScorer', 'GroupAnlsScorer']
10 |
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/scorers/accuracy_scorer.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import List
3 | from operator import itemgetter
4 |
5 | from .base_scorer import BaseScorer
6 |
7 | logger = logging.getLogger(__name__)
8 |
9 |
10 | class AccuracyScorer(BaseScorer):
11 | """Accuracy Scorer."""
12 |
13 | def __init__(self, threshold: float = 0.5):
14 | self.__scores: List[float] = []
15 | self.threshold = threshold
16 |
17 | @property
18 | def scores(self):
19 | return self.__scores
20 |
21 | def check_denotation(self, out: list, ref: list) -> bool:
22 | return out == ref
23 |
24 | def add(self, out_items: List[dict], ref_items: List[dict]):
25 | """Add more items for computing corpus level scores.
26 |
27 | Args:
28 | out_items: outs from a single document (line)
29 | ref_items: reference of the evaluated document (line)
30 |
31 | """
32 | out_ann = sorted(out_items['annotations'], key=itemgetter('key'))
33 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key'))
34 | assert [a['key'] for a in out_ann] == [a['key'] for a in ref_ann]
35 |
36 | for out, ref in zip(out_ann, ref_ann):
37 | o_values = [v['value'] for v in out['values']]
38 | r_values = [v['value'] for v in ref['values']]
39 | score = int(self.check_denotation(o_values, r_values))
40 | self.__scores.append(score)
41 |
42 | def score(self) -> float:
43 | if self.__scores:
44 | return sum(self.__scores) / len(self.__scores)
45 | return 0.0
46 |
47 | @classmethod
48 | def support_feature_scores(cls) -> bool:
49 | return False
50 |
51 | @classmethod
52 | def metric_name(cls) -> str:
53 | return "Accuracy"
54 |
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/scorers/anls_scorer.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import List
3 | from operator import itemgetter
4 |
5 | import textdistance
6 |
7 | from .base_scorer import BaseScorer
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | class AnlsScorer(BaseScorer):
13 | """ANSL Scorer."""
14 |
15 | def __init__(self, threshold: float = 0.5):
16 | self.__scores: List[float] = []
17 | self.threshold = threshold
18 |
19 | @property
20 | def scores(self):
21 | return self.__scores
22 |
23 | def add(self, out_items: List[dict], ref_items: List[dict]):
24 | """Add more items for computing corpus level scores.
25 |
26 | Args:
27 | out_items: outs from a single document (line)
28 | ref_items: reference of the evaluated document (line)
29 |
30 | """
31 | out_ann = sorted(out_items['annotations'], key=itemgetter('key'))
32 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key'))
33 | assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann]
34 |
35 | """try:
36 | # assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann]
37 | out_keys = [a['key'][:100] for a in out_ann]
38 | ref_keys = [a['key'][:100] for a in ref_ann]
39 | # assert out_keys == ref_keys
40 | for i in range(len(out_keys)):
41 | try:
42 | assert out_keys[i] == ref_keys[i]
43 | except AssertionError as e:
44 | print(out_keys[i])
45 | print(ref_keys[i])
46 | print('==============')
47 | # exit(0)
48 |
49 | except AssertionError as e:
50 | print('key of pred and gt unmatched:')
51 | # print('pred:', out_keys)
52 | # print('gt:', ref_keys)
53 | exit(0)"""
54 |
55 | for out, ref in zip(out_ann, ref_ann):
56 | assert len(out['values']) == 1
57 | val = out['values'][0]['value']
58 | possible_vals = ref['values'][0]['value_variants']
59 | best_score = max([textdistance.levenshtein.normalized_similarity(val, pos)
60 | for pos in possible_vals])
61 | if 1 - self.threshold >= best_score:
62 | best_score = 0.0
63 | self.__scores.append(best_score)
64 |
65 | def score(self) -> float:
66 | if self.__scores:
67 | return sum(self.__scores) / len(self.__scores)
68 | return 0.0
69 |
70 | @classmethod
71 | def support_feature_scores(cls) -> bool:
72 | return False
73 |
74 | @classmethod
75 | def metric_name(cls) -> str:
76 | return "ANLS"
77 |
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/scorers/base_scorer.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from typing import List
3 |
4 |
5 | class BaseScorer(abc.ABC):
6 | """Abstract class for scorers."""
7 |
8 | @abc.abstractmethod
9 | def add(self, out_items: List[dict], ref_items: List[dict]):
10 | pass
11 |
12 | @abc.abstractmethod
13 | def score(self):
14 | pass
15 |
16 | @abc.abstractclassmethod
17 | def support_feature_scores(cls) -> bool:
18 | pass
19 |
20 | @abc.abstractclassmethod
21 | def metric_name(cls) -> str:
22 | pass
23 |
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/scorers/geval_scorer.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | import tempfile
3 | from collections import defaultdict
4 | import os
5 |
6 | from .fscorer import FScorer
7 | from .base_scorer import BaseScorer
8 |
9 |
10 | GEVAL_BINARY = os.getenv('GEVAL_BINARY', '/data/shared/bin/geval')
11 | GEVAL_METRIC = os.getenv('GEVAL_METRIC', 'MultiLabel-F1:cN')
12 |
13 |
14 | class GevalScorer(BaseScorer):
15 | def __init__(self):
16 | self.__ref = tempfile.NamedTemporaryFile('w+t')
17 | self.__out = tempfile.NamedTemporaryFile('w+t')
18 | self.__ref_data = defaultdict(set)
19 | self.__out_data = defaultdict(set)
20 |
21 | @staticmethod
22 | def add_to_geval_data(data, line):
23 | name = line['name']
24 | for annotation in line['annotations']:
25 | for idx, val in enumerate(annotation['values'], 1):
26 | for child in val['children']:
27 | new_name = child['key'] + '__' + str(idx) if '__' in child['key'] else child['key']
28 | if child['values'] and child['values'] != ['']:
29 | new_value = '|'.join([v['value'].replace(' ', '_') for v in child['values']])
30 | data[name].add(f'{new_name}={new_value}')
31 |
32 | def save_geval_files(self):
33 | for name in sorted(self.__ref_data.keys()):
34 | self.__ref.write(' '.join(self.__ref_data[name]) + '\n')
35 | self.__out.write(' '.join(self.__out_data[name]) + '\n')
36 |
37 | def add(self, out_items: List[str], ref_items: List[str]):
38 | self.add_to_geval_data(self.__out_data, out_items)
39 | self.add_to_geval_data(self.__ref_data, ref_items)
40 |
41 | def support_feature_scores(cls) -> bool:
42 | return False
43 |
44 | def metric_name(cls) -> str:
45 | return "GEVAL"
46 |
47 | def run_geval(self):
48 | self.__ref.flush()
49 | self.__out.flush()
50 | try:
51 | return float(os.popen(f'{GEVAL_BINARY} -o {self.__out.name} -e {self.__ref.name} --metric {GEVAL_METRIC}').read())
52 | except:
53 | return -1
54 |
55 | def score(self) -> float:
56 | self.save_geval_files()
57 | return self.run_geval()
58 |
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/scorers/mean_fscorer.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from .fscorer import FScorer
4 | from .base_scorer import BaseScorer
5 |
6 |
7 | class MeanFScorer(BaseScorer):
8 | def __init__(self):
9 | self.__scores: List[float] = []
10 |
11 | def add(self, out_items: List[str], ref_items: List[str]):
12 | fscorer = FScorer()
13 | fscorer.add(out_items, ref_items)
14 | self.__scores.append(fscorer.f_score())
15 |
16 | def support_feature_scores(cls) -> bool:
17 | return False
18 |
19 | def metric_name(cls) -> str:
20 | return "MEAN-F1"
21 |
22 | def score(self) -> float:
23 | if self.__scores:
24 | return sum(self.__scores) / len(self.__scores)
25 | return 0.0
26 |
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/due_evaluator/utils.py:
--------------------------------------------------------------------------------
1 | from .scorers.fscorer import FScorer
2 | from typing import Dict, List, Optional, Sequence, Union
3 |
4 | import pandas as pd
5 |
6 | from .due_evaluator import DueEvaluator
7 |
8 |
9 | def dataframe_to_print(df: pd.DataFrame, print_format: Optional[str] = 'text') -> str:
10 | """Export dataframe to json or plain text.
11 |
12 | Args:
13 | df (pd.DataFrame): data
14 | print_format (str, optional): Print format. Defaults to 'text'.
15 |
16 | Raises:
17 | ValueError: unknown print_format
18 |
19 | Returns:
20 | str: printed version of dataframe
21 |
22 | """
23 | out: str
24 | if print_format == 'latex':
25 | out = df.reset_index().to_latex(index=False)
26 | elif print_format == 'text':
27 | out = df.reset_index().to_string(index=False)
28 | elif print_format == 'json':
29 | out = df.to_json(orient='index')
30 | else:
31 | raise ValueError()
32 | return out
33 |
34 |
35 | def property_scores_to_string(
36 | dues: List[DueEvaluator], print_format: str = 'text', columns: Sequence[str] = ('Precision', 'Recall', 'F-1'),
37 | ) -> str:
38 | """Print out scores per property.
39 |
40 | Args:
41 | dues: List of DueEvaluators
42 | print_format: output format: text or latex
43 | columns: a list of metrics to print
44 |
45 | Returns:
46 | str: string table with feature scores.
47 |
48 | """
49 | data = []
50 | for property_name in sorted(dues[0].property_scorers.keys()) + ['ALL']:
51 | row_data: Dict[str, Union[str, float]] = {}
52 | row_data['Label'] = property_name
53 | for due in dues:
54 | if len(dues) == 1:
55 | suffix = ''
56 | else:
57 | suffix = f' ({due.path})'
58 | if property_name == 'ALL':
59 | scorer = due.general_scorer
60 | else:
61 | scorer = due.property_scorers[property_name]
62 |
63 | row_data[scorer.metric_name() + suffix] = scorer.score()
64 | if isinstance(scorer, FScorer):
65 | if 'Precision' in columns:
66 | row_data['Precision' + suffix] = scorer.precision()
67 | if 'Recall' in columns:
68 | row_data['Recall' + suffix] = scorer.recall()
69 | data.append(row_data)
70 |
71 | df = pd.DataFrame(data)
72 | df.set_index('Label', drop=True, inplace=True)
73 |
74 | return dataframe_to_print(df, print_format)
75 |
--------------------------------------------------------------------------------
/UReader/pipeline/eval_utils/run_evaluation.py:
--------------------------------------------------------------------------------
1 | from .tools import llm_answer_eval, postprocess_llm_vqa, textcaps_textvqa_eval
2 |
3 | if __name__ == '__main__':
4 |
5 | llm_answer_eval(metric_names=['RelaxedAccuracy'], result_path='evaluate_results/test_ChartQA.jsonl', save_each_eval=True)
6 | llm_answer_eval(metric_names=['ExactAccuracy'], result_path='evaluate_results/test_TabFact.jsonl', save_each_eval=True)
7 | llm_answer_eval(metric_names=['BLEU1', 'BLEU2', 'BLEU3', 'BLEU4', 'Meteor', 'RougeL', 'CIDEr'], result_path='evaluate_results/test_VisualMRC.jsonl', save_each_eval=True)
8 |
9 |
10 | postprocess_llm_vqa(dataset_name='DeepFormQA', split='test',
11 | llm_pred_path='./evaluate_results/test_DeepForm.jsonl',
12 | eval_flag=True)
13 | postprocess_llm_vqa(dataset_name='DocVQA', split='test',
14 | llm_pred_path='./evaluate_results/test_DocVQA.jsonl',
15 | eval_flag=True)
16 | postprocess_llm_vqa(dataset_name='InfographicsVQA', split='test',
17 | llm_pred_path='evaluate_results/test_InfographicsVQA.jsonl',
18 | eval_flag=True)
19 | postprocess_llm_vqa(dataset_name='KleisterCharityQA', split='test',
20 | llm_pred_path='evaluate_results/test_KleisterCharity.jsonl',
21 | eval_flag=True)
22 | postprocess_llm_vqa(dataset_name='WikiTableQuestions', split='test',
23 | llm_pred_path='evaluate_results/test_WikiTableQuestions.jsonl',
24 | eval_flag=True)
25 |
26 | # need to submit evaluate_results/***_official_eval.json
27 | textcaps_textvqa_eval(result_path='evaluate_results/test_TextCaps.jsonl', dataset='TextCaps', split='test')
28 | textcaps_textvqa_eval(result_path='evaluate_results/test_TextVQA.jsonl', dataset='TextVQA', split='test')
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/UReader/pipeline/interface.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import requests
4 | from PIL import Image
5 | from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration
6 | from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer
7 | from mplug_owl.processing_mplug_owl import MplugOwlImageProcessor, MplugOwlProcessor
8 | from sconf import Config
9 | from pipeline.data_utils.processors.builder import build_processors
10 |
11 |
12 | def get_model(pretrained_ckpt, use_bf16=False):
13 | """Model Provider with tokenizer and processor.
14 |
15 | Args:
16 | pretrained_ckpt (string): The path to pre-trained checkpoint.
17 | use_bf16 (bool, optional): Whether to use bfloat16 to load the model. Defaults to False.
18 |
19 | Returns:
20 | model: MplugOwl Model
21 | tokenizer: MplugOwl text tokenizer
22 | processor: MplugOwl processor (including text and image)
23 | """
24 | model = MplugOwlForConditionalGeneration.from_pretrained(
25 | pretrained_ckpt,
26 | torch_dtype=torch.bfloat16 if use_bf16 else torch.half,
27 | )
28 | config = Config('configs/sft/release.yaml')
29 | image_processor = build_processors(config['valid_processors'])['sft']
30 | tokenizer = MplugOwlTokenizer.from_pretrained(pretrained_ckpt)
31 | processor = MplugOwlProcessor(image_processor, tokenizer)
32 | return model, tokenizer, processor
33 |
34 |
35 | def do_generate(prompts, image_list, model, tokenizer, processor, use_bf16=False, **generate_kwargs):
36 | """The interface for generation
37 |
38 | Args:
39 | prompts (List[str]): The prompt text
40 | image_list (List[str]): Paths of images
41 | model (MplugOwlForConditionalGeneration): MplugOwlForConditionalGeneration
42 | tokenizer (MplugOwlTokenizer): MplugOwlTokenizer
43 | processor (MplugOwlProcessor): MplugOwlProcessor
44 | use_bf16 (bool, optional): Whether to use bfloat16. Defaults to False.
45 |
46 | Returns:
47 | sentence (str): Generated sentence.
48 | """
49 | if image_list:
50 | images = [Image.open(_) for _ in image_list]
51 | else:
52 | images = None
53 | inputs = processor(text=prompts, images=images, return_tensors='pt')
54 | inputs = {k: v.bfloat16() if v.dtype == torch.float else v for k, v in inputs.items()}
55 | inputs = {k: v.to(model.device) for k, v in inputs.items()}
56 | with torch.no_grad():
57 | res = model.generate(**inputs, **generate_kwargs)
58 | sentence = tokenizer.decode(res.tolist()[0], skip_special_tokens=True)
59 | return sentence
60 |
61 |
62 | if __name__ == '__main__':
63 | pass
64 |
--------------------------------------------------------------------------------
/UReader/pipeline/trainer.py:
--------------------------------------------------------------------------------
1 | import torch.distributed as dist
2 | import argparse
3 | from functools import partial
4 |
5 | import torch
6 |
7 | from torch.utils.data import DataLoader, Dataset
8 | from torch.utils.data.distributed import DistributedSampler
9 |
10 | from transformers import Trainer
11 |
12 | from pipeline.utils import batchify
13 |
14 |
15 | class CustomTrainer(Trainer):
16 |
17 | def get_train_dataloader(self) -> DataLoader:
18 | dataset = self.train_dataset
19 | sampler = DistributedSampler(dataset)
20 | return torch.utils.data.DataLoader(
21 | dataset, batch_size=self._train_batch_size,
22 | sampler=sampler,
23 | num_workers=self.args.dataloader_num_workers,
24 | drop_last=True,
25 | pin_memory=False,
26 | collate_fn=batchify)
27 |
28 |
29 | def get_eval_dataloader(self, eval_dataset: Dataset | None = None) -> DataLoader:
30 | dataset = self.eval_dataset
31 | sampler = DistributedSampler(dataset, shuffle=False)
32 | return torch.utils.data.DataLoader(
33 | dataset, batch_size=self._train_batch_size,
34 | sampler=sampler,
35 | num_workers=self.args.dataloader_num_workers,
36 | drop_last=True,
37 | pin_memory=False,
38 | collate_fn=batchify)
--------------------------------------------------------------------------------
/UReader/scripts/eval/eval_benchmark.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH=`pwd`
2 | python -m torch.distributed.launch --use_env \
3 | --nproc_per_node ${NPROC_PER_NODE:-8} \
4 | --nnodes ${WORLD_SIZE:-1} \
5 | --node_rank ${RANK:-0} \
6 | --master_addr ${MASTER_ADDR:-127.0.0.1} \
7 | --master_port ${MASTER_PORT:-12345} \
8 | pipeline/evaluation.py \
9 | --hf_model ./checkpoints/ureader
--------------------------------------------------------------------------------
/UReader/scripts/train_it.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # For A100 80G
3 | DIR=`pwd`
4 | export PYTHONPATH=$DIR
5 | DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
6 |
7 | if [ $MASTER_ADDR ];then
8 | echo $MASTER_ADDR
9 | echo $MASTER_PORT
10 | echo $WORLD_SIZE
11 | echo $RANK
12 | else
13 | MASTER_ADDR=127.0.0.1
14 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15
15 | WORLD_SIZE=1
16 | RANK=0
17 | fi
18 |
19 | DISTRIBUTED_ARGS="--nproc_per_node 1 \
20 | --nnodes ${WORLD_SIZE} \
21 | --node_rank ${RANK} \
22 | --master_addr ${MASTER_ADDR} \
23 | --master_port ${MASTER_PORT}"
24 |
25 | EXP_NAME=ureader
26 |
27 | max_length=2048
28 | micro_batch_size=4
29 | global_batch_size=256
30 | gradient_accumulation_steps=1
31 |
32 | SAVE_NAME=${ureader}_${max_length}_${global_batch_size}
33 |
34 | SAVE_PATH="./output/${EXP_NAME}/"
35 | TENSORBOARD_PATH="./tensorboard/sft/${SAVE_NAME}/"
36 |
37 |
38 |
39 | # train_iters = total_data * train_epochs // global_batch_size
40 | train_epochs=10
41 | train_iters=25579
42 |
43 | lr_warmup_iters=50
44 |
45 | eval_iter=50
46 | eval_interval=50
47 | save_interval=500
48 |
49 | mkdir -p ${SAVE_PATH}
50 | mkdir -p ${TENSORBOARD_PATH}
51 |
52 | options=" \
53 | --pretrained-ckpt checkpoints/mplug-owl-llama-7b \
54 | --seq-length ${max_length} \
55 | --micro-batch-size ${micro_batch_size} \
56 | --global-batch-size ${global_batch_size} \
57 | --num-training-steps ${train_iters} \
58 | --train-epochs ${train_epochs} \
59 | --num-warmup-steps ${lr_warmup_iters} \
60 | --gradient-accumulation-steps ${gradient_accumulation_steps} \
61 | --lr 1e-4 \
62 | --min-lr 1e-6 \
63 | --eval-iters ${eval_iter} \
64 | --save-interval ${save_interval} \
65 | --save-path ${SAVE_PATH} \
66 | --tensorboard-dir ${TENSORBOARD_PATH} \
67 | --clip-grad 1.0 \
68 | --weight-decay 0.0001 \
69 | --adam-beta1 0.9 \
70 | --adam-beta2 0.999 \
71 | --num-workers 16 \
72 | --use-lora \
73 | --gradient-checkpointing \
74 | --bf16"
75 |
76 | multimodal_options=" \
77 | --mm-config configs/sft/release.yaml
78 | "
79 |
80 | python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log
--------------------------------------------------------------------------------
/UReader/scripts/train_it_v100.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # For V100 32G
3 | DIR=`pwd`
4 | export PYTHONPATH=$DIR
5 | DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
6 |
7 | if [ $MASTER_ADDR ];then
8 | echo $MASTER_ADDR
9 | echo $MASTER_PORT
10 | echo $WORLD_SIZE
11 | echo $RANK
12 | else
13 | MASTER_ADDR=127.0.0.1
14 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15
15 | WORLD_SIZE=1
16 | RANK=0
17 | fi
18 |
19 | DISTRIBUTED_ARGS="--nproc_per_node 1 \
20 | --nnodes ${WORLD_SIZE} \
21 | --node_rank ${RANK} \
22 | --master_addr ${MASTER_ADDR} \
23 | --master_port ${MASTER_PORT}"
24 |
25 | EXP_NAME=ureader
26 |
27 | max_length=2048
28 | micro_batch_size=1
29 | global_batch_size=256
30 | gradient_accumulation_steps=1
31 |
32 | SAVE_NAME=${ureader}_${max_length}_${global_batch_size}
33 |
34 | SAVE_PATH="./output/${EXP_NAME}/"
35 | TENSORBOARD_PATH="./tensorboard/sft/${SAVE_NAME}/"
36 |
37 |
38 |
39 | # train_iters = total_data * train_epochs // global_batch_size
40 | train_epochs=10
41 | train_iters=25579
42 |
43 | lr_warmup_iters=50
44 |
45 | eval_iter=50
46 | eval_interval=50
47 | save_interval=500
48 |
49 | mkdir -p ${SAVE_PATH}
50 | mkdir -p ${TENSORBOARD_PATH}
51 |
52 | options=" \
53 | --pretrained-ckpt checkpoints/mplug-owl-llama-7b \
54 | --seq-length ${max_length} \
55 | --micro-batch-size ${micro_batch_size} \
56 | --global-batch-size ${global_batch_size} \
57 | --num-training-steps ${train_iters} \
58 | --train-epochs ${train_epochs} \
59 | --num-warmup-steps ${lr_warmup_iters} \
60 | --gradient-accumulation-steps ${gradient_accumulation_steps} \
61 | --lr 1e-4 \
62 | --min-lr 1e-6 \
63 | --eval-iters ${eval_iter} \
64 | --save-interval ${save_interval} \
65 | --save-path ${SAVE_PATH} \
66 | --tensorboard-dir ${TENSORBOARD_PATH} \
67 | --clip-grad 1.0 \
68 | --weight-decay 0.0001 \
69 | --adam-beta1 0.9 \
70 | --adam-beta2 0.999 \
71 | --num-workers 16 \
72 | --use-lora \
73 | --gradient-checkpointing \
74 | --fp16 \
75 | --deepspeed ds_config.json"
76 |
77 | multimodal_options=" \
78 | --mm-config configs/sft/release.yaml
79 | "
80 |
81 | python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log
--------------------------------------------------------------------------------
/UReader/serve/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/serve/__init__.py
--------------------------------------------------------------------------------
/UReader/serve/gradio_css.py:
--------------------------------------------------------------------------------
1 | code_highlight_css = (
2 | """
3 | #chatbot .hll { background-color: #ffffcc }
4 | #chatbot .c { color: #408080; font-style: italic }
5 | #chatbot .err { border: 1px solid #FF0000 }
6 | #chatbot .k { color: #008000; font-weight: bold }
7 | #chatbot .o { color: #666666 }
8 | #chatbot .ch { color: #408080; font-style: italic }
9 | #chatbot .cm { color: #408080; font-style: italic }
10 | #chatbot .cp { color: #BC7A00 }
11 | #chatbot .cpf { color: #408080; font-style: italic }
12 | #chatbot .c1 { color: #408080; font-style: italic }
13 | #chatbot .cs { color: #408080; font-style: italic }
14 | #chatbot .gd { color: #A00000 }
15 | #chatbot .ge { font-style: italic }
16 | #chatbot .gr { color: #FF0000 }
17 | #chatbot .gh { color: #000080; font-weight: bold }
18 | #chatbot .gi { color: #00A000 }
19 | #chatbot .go { color: #888888 }
20 | #chatbot .gp { color: #000080; font-weight: bold }
21 | #chatbot .gs { font-weight: bold }
22 | #chatbot .gu { color: #800080; font-weight: bold }
23 | #chatbot .gt { color: #0044DD }
24 | #chatbot .kc { color: #008000; font-weight: bold }
25 | #chatbot .kd { color: #008000; font-weight: bold }
26 | #chatbot .kn { color: #008000; font-weight: bold }
27 | #chatbot .kp { color: #008000 }
28 | #chatbot .kr { color: #008000; font-weight: bold }
29 | #chatbot .kt { color: #B00040 }
30 | #chatbot .m { color: #666666 }
31 | #chatbot .s { color: #BA2121 }
32 | #chatbot .na { color: #7D9029 }
33 | #chatbot .nb { color: #008000 }
34 | #chatbot .nc { color: #0000FF; font-weight: bold }
35 | #chatbot .no { color: #880000 }
36 | #chatbot .nd { color: #AA22FF }
37 | #chatbot .ni { color: #999999; font-weight: bold }
38 | #chatbot .ne { color: #D2413A; font-weight: bold }
39 | #chatbot .nf { color: #0000FF }
40 | #chatbot .nl { color: #A0A000 }
41 | #chatbot .nn { color: #0000FF; font-weight: bold }
42 | #chatbot .nt { color: #008000; font-weight: bold }
43 | #chatbot .nv { color: #19177C }
44 | #chatbot .ow { color: #AA22FF; font-weight: bold }
45 | #chatbot .w { color: #bbbbbb }
46 | #chatbot .mb { color: #666666 }
47 | #chatbot .mf { color: #666666 }
48 | #chatbot .mh { color: #666666 }
49 | #chatbot .mi { color: #666666 }
50 | #chatbot .mo { color: #666666 }
51 | #chatbot .sa { color: #BA2121 }
52 | #chatbot .sb { color: #BA2121 }
53 | #chatbot .sc { color: #BA2121 }
54 | #chatbot .dl { color: #BA2121 }
55 | #chatbot .sd { color: #BA2121; font-style: italic }
56 | #chatbot .s2 { color: #BA2121 }
57 | #chatbot .se { color: #BB6622; font-weight: bold }
58 | #chatbot .sh { color: #BA2121 }
59 | #chatbot .si { color: #BB6688; font-weight: bold }
60 | #chatbot .sx { color: #008000 }
61 | #chatbot .sr { color: #BB6688 }
62 | #chatbot .s1 { color: #BA2121 }
63 | #chatbot .ss { color: #19177C }
64 | #chatbot .bp { color: #008000 }
65 | #chatbot .fm { color: #0000FF }
66 | #chatbot .vc { color: #19177C }
67 | #chatbot .vg { color: #19177C }
68 | #chatbot .vi { color: #19177C }
69 | #chatbot .vm { color: #19177C }
70 | #chatbot .il { color: #666666 }
71 | """)
72 | #.highlight { background: #f8f8f8; }
73 |
74 |
--------------------------------------------------------------------------------
/UReader/serve/model_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import re
4 | import torch
5 | import transformers
6 | import traceback
7 |
8 | from queue import Queue
9 | from threading import Thread
10 |
11 |
12 | def post_process_output(text):
13 | text = text.strip()
14 | pattern = re.compile(
15 | r"||||\[PAD\]|<\|endoftext\|>|\[UNK\]|\[CLS\]|\[MASK\]|<\|startofpiece\|>|<\|endofpiece\|>|\[gMASK\]|\[sMASK\]"
16 | )
17 | text = pattern.sub("", text.strip()).strip()
18 | return text
19 |
20 |
21 | def post_process_code(code):
22 | sep = "\n```"
23 | if sep in code:
24 | blocks = code.split(sep)
25 | if len(blocks) % 2 == 1:
26 | for i in range(1, len(blocks), 2):
27 | blocks[i] = blocks[i].replace("\\_", "_")
28 | code = sep.join(blocks)
29 | return code
30 |
31 |
32 | class Stream(transformers.StoppingCriteria):
33 | def __init__(self, callback_func=None):
34 | self.callback_func = callback_func
35 |
36 | def __call__(self, input_ids, scores) -> bool:
37 | if self.callback_func is not None:
38 | self.callback_func(input_ids[0])
39 | return False
40 |
41 |
42 | class Iteratorize:
43 |
44 | """
45 | Transforms a function that takes a callback
46 | into a lazy iterator (generator).
47 | """
48 |
49 | def __init__(self, func, kwargs={}, callback=None):
50 | self.mfunc = func
51 | self.c_callback = callback
52 | self.q = Queue()
53 | self.sentinel = object()
54 | self.kwargs = kwargs
55 | self.stop_now = False
56 |
57 | def _callback(val):
58 | if self.stop_now:
59 | raise ValueError
60 | self.q.put(val)
61 |
62 | def gentask():
63 | try:
64 | ret = self.mfunc(callback=_callback, **self.kwargs)
65 | except ValueError:
66 | pass
67 | except:
68 | traceback.print_exc()
69 | pass
70 |
71 | self.q.put(self.sentinel)
72 | if self.c_callback:
73 | self.c_callback(ret)
74 |
75 | self.thread = Thread(target=gentask)
76 | self.thread.start()
77 |
78 | def __iter__(self):
79 | return self
80 |
81 | def __next__(self):
82 | obj = self.q.get(True, None)
83 | if obj is self.sentinel:
84 | raise StopIteration
85 | else:
86 | return obj
87 |
88 | def __enter__(self):
89 | return self
90 |
91 | def __exit__(self, exc_type, exc_val, exc_tb):
92 | self.stop_now = True
--------------------------------------------------------------------------------
/assets/docowl1.5_chat_case.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/assets/docowl1.5_chat_case.png
--------------------------------------------------------------------------------
/assets/docowl2_github_case.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/assets/docowl2_github_case.jpg
--------------------------------------------------------------------------------
/assets/huggingface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/assets/huggingface.png
--------------------------------------------------------------------------------
/assets/mPLUG_new1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/assets/mPLUG_new1.png
--------------------------------------------------------------------------------
/assets/modelscope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/assets/modelscope.png
--------------------------------------------------------------------------------