├── .gitignore ├── DocOwl ├── DocLLM │ ├── LLMDoc.jsonl │ ├── export_docowl.jsonl │ ├── export_minigpt4.jsonl │ ├── export_mplug_owl.jsonl │ └── images │ │ ├── chart_0.png │ │ ├── chart_13.png │ │ ├── chart_15.png │ │ ├── chart_16.png │ │ ├── chart_20.png │ │ ├── chart_26.png │ │ ├── chart_31.png │ │ ├── chart_32.png │ │ ├── chart_34.png │ │ ├── chart_35.png │ │ ├── chart_37.png │ │ ├── chart_41.png │ │ ├── chart_44.png │ │ ├── chart_46.png │ │ ├── chart_49.png │ │ ├── chart_57.png │ │ ├── chart_69.png │ │ ├── chart_76.png │ │ ├── chart_80.png │ │ ├── document_0.png │ │ ├── document_1.png │ │ ├── document_10.png │ │ ├── document_19.png │ │ ├── document_2.png │ │ ├── document_22.png │ │ ├── document_27.png │ │ ├── document_35.png │ │ ├── document_40.png │ │ ├── document_43.png │ │ ├── document_44.png │ │ ├── document_47.png │ │ ├── document_52.png │ │ ├── document_57.png │ │ ├── document_67.png │ │ ├── document_7.png │ │ ├── document_8.png │ │ ├── document_86.png │ │ ├── document_91.png │ │ ├── document_97.png │ │ ├── natural_1.png │ │ ├── natural_10.png │ │ ├── natural_14.png │ │ ├── natural_15.png │ │ ├── natural_21.png │ │ ├── natural_32.png │ │ ├── natural_34.png │ │ ├── natural_38.png │ │ ├── natural_42.png │ │ ├── natural_44.png │ │ ├── natural_5.png │ │ ├── natural_56.png │ │ ├── natural_69.png │ │ ├── natural_75.png │ │ ├── natural_77.png │ │ ├── natural_79.png │ │ ├── natural_8.png │ │ ├── natural_81.png │ │ ├── natural_90.png │ │ ├── natural_95.png │ │ ├── screenshot_1.png │ │ ├── screenshot_11.png │ │ ├── screenshot_25.png │ │ ├── screenshot_27.png │ │ ├── screenshot_28.png │ │ ├── screenshot_30.png │ │ ├── screenshot_31.png │ │ ├── screenshot_34.png │ │ ├── screenshot_4.png │ │ ├── screenshot_44.png │ │ ├── screenshot_47.png │ │ ├── screenshot_72.png │ │ ├── screenshot_77.png │ │ ├── screenshot_8.png │ │ ├── screenshot_83.png │ │ ├── screenshot_87.png │ │ ├── screenshot_91.png │ │ ├── screenshot_96.png │ │ ├── table_0.png │ │ ├── table_13.png │ │ ├── table_17.png │ │ ├── table_29.png │ │ ├── table_39.png │ │ ├── table_42.png │ │ ├── table_50.png │ │ ├── table_54.png │ │ ├── table_56.png │ │ ├── table_57.png │ │ ├── table_60.png │ │ ├── table_7.png │ │ ├── table_73.png │ │ ├── table_75.png │ │ ├── table_76.png │ │ ├── table_79.png │ │ ├── table_87.png │ │ ├── table_89.png │ │ ├── table_97.png │ │ └── table_98.png ├── README.md └── assets │ ├── -twitter-blue.svg │ ├── Demo-ModelScope-brightgreen.svg │ ├── LICENSE-Apache License-blue.svg │ ├── Paper-Arxiv-orange.svg │ ├── Paper-PDF-orange.svg │ ├── cases_git.jpg │ ├── mPLUG_new1.png │ ├── modelscopeIcon.svg │ └── overview.jpg ├── DocOwl1.5 ├── README.md ├── app.py ├── assets │ ├── Paper-Arxiv-orange.svg │ ├── doc_instruct.png │ ├── modelscope.png │ └── radar.png ├── docowl_benchmark_evaluate.py ├── docowl_doclocal4k_evaluate.py ├── docowl_infer.py ├── evaluation │ ├── benchmarks_eval.py │ ├── due_benchmarks_eval.py │ ├── due_evaluator │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── __version__.py │ │ ├── due_evaluator.py │ │ ├── py.typed │ │ ├── scorers │ │ │ ├── __init__.py │ │ │ ├── accuracy_scorer.py │ │ │ ├── anls_scorer.py │ │ │ ├── base_scorer.py │ │ │ ├── fscorer.py │ │ │ ├── geval_scorer.py │ │ │ ├── group_anls.py │ │ │ ├── mean_fscorer.py │ │ │ └── wtq_scorer.py │ │ └── utils.py │ └── evaluator.py ├── model_worker.py ├── mplug_docowl │ ├── __init__.py │ ├── constants.py │ ├── conversation.py │ ├── mm_utils.py │ ├── model │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── configuration_mplug_docowl.py │ │ ├── convert_mplug_docowl_weight_to_hf.py │ │ ├── convert_mplug_docowl_weight_to_hf_v2.py │ │ ├── modeling_attn_mask_utils.py │ │ ├── modeling_llama2.py │ │ ├── modeling_mplug_docowl.py │ │ ├── utils.py │ │ └── visual_encoder.py │ ├── processor.py │ ├── train │ │ ├── llama_flash_attn_monkey_patch.py │ │ ├── mplug_docowl_trainer.py │ │ └── train_docowl.py │ └── utils.py └── scripts │ ├── finetune_docowl.sh │ ├── finetune_docowl_lora.sh │ ├── zero2.json │ ├── zero3.json │ └── zero3_offload.json ├── DocOwl2 ├── README.md ├── assets │ ├── Paper-Arxiv-orange.svg │ ├── docowl2_effiency_and_case.jpg │ └── modelscope.png ├── docowl_benchmark_evaluate.py └── evaluation │ ├── benchmarks_eval.py │ ├── dude_eval.py │ ├── due_benchmarks_eval.py │ ├── due_evaluator │ ├── __init__.py │ ├── __main__.py │ ├── __version__.py │ ├── due_evaluator.py │ ├── py.typed │ ├── scorers │ │ ├── __init__.py │ │ ├── accuracy_scorer.py │ │ ├── anls_scorer.py │ │ ├── base_scorer.py │ │ ├── fscorer.py │ │ ├── geval_scorer.py │ │ ├── group_anls.py │ │ ├── mean_fscorer.py │ │ └── wtq_scorer.py │ └── utils.py │ ├── evaluator.py │ ├── mpdocvqa_eval.py │ └── newsvideoqa_eval.py ├── LICENSE ├── PaperOwl ├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── assets │ ├── Paper-Arxiv-orange.svg │ ├── Paper-PDF-orange.svg │ ├── data_process.png │ ├── diagram_distribution.png │ ├── intro_case.jpeg │ └── paper_category.png ├── configs │ └── sft │ │ └── release.yaml ├── ds_config.json ├── mplug_owl │ ├── __init__.py │ ├── configuration_mplug_owl.py │ ├── modeling_mplug_owl.py │ ├── processing_mplug_owl.py │ └── tokenization_mplug_owl.py ├── pipeline │ ├── __init__.py │ ├── data_utils │ │ ├── __init__.py │ │ ├── processors │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── caption_processor.py │ │ │ ├── default_processor.py │ │ │ └── doc_processor.py │ │ ├── randaugment.py │ │ ├── registry.py │ │ └── xgpt3_dataset.py │ ├── eval_utils │ │ ├── __init__.py │ │ ├── due_evaluator │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ ├── __version__.py │ │ │ ├── due_evaluator.py │ │ │ ├── py.typed │ │ │ ├── scorers │ │ │ │ ├── __init__.py │ │ │ │ ├── accuracy_scorer.py │ │ │ │ ├── anls_scorer.py │ │ │ │ ├── base_scorer.py │ │ │ │ ├── fscorer.py │ │ │ │ ├── geval_scorer.py │ │ │ │ ├── group_anls.py │ │ │ │ ├── mean_fscorer.py │ │ │ │ └── wtq_scorer.py │ │ │ └── utils.py │ │ ├── run_evaluation.py │ │ └── tools.py │ ├── evaluation.py │ ├── interface.py │ ├── train.py │ ├── trainer.py │ └── utils.py ├── scripts │ ├── train_it.sh │ └── train_it_v100.sh └── serve │ ├── __init__.py │ ├── conversation.py │ ├── gradio_css.py │ ├── gradio_patch.py │ ├── io_utils.py │ ├── model_utils.py │ ├── model_worker.py │ ├── serve_utils.py │ └── web_server.py ├── README.md ├── TinyChart ├── README.md ├── app.py ├── assets │ ├── Paper-Arxiv-orange.svg │ ├── cases.png │ └── perform_and_speed.png ├── images │ ├── albums.png │ ├── college.png │ ├── diseases.png │ ├── economy.png │ ├── immigrants.png │ ├── market.png │ ├── sails.png │ ├── sports.png │ └── workers.png ├── inference.ipynb ├── pyproject.toml ├── scripts │ ├── convert_model_config.py │ ├── evaluate.sh │ ├── merge_jsonl_sort.py │ ├── split_jsonl_dataset.py │ ├── train.sh │ ├── vit_add_tome.py │ └── zero3_offload_decay.json └── tinychart │ ├── __init__.py │ ├── arguments.py │ ├── constants.py │ ├── conversation.py │ ├── data │ ├── __init__.py │ ├── dataset.py │ ├── preprocess │ │ ├── __init__.py │ │ ├── default.py │ │ ├── phi.py │ │ └── v1.py │ └── process.py │ ├── eval │ ├── __init__.py │ ├── eval_chart2table.py │ ├── eval_chart2text.py │ ├── eval_metric.py │ ├── eval_model.py │ ├── run_eval.py │ └── run_tiny_chart.py │ ├── mm_utils.py │ ├── model │ ├── __init__.py │ ├── builder.py │ ├── language_model │ │ ├── __init__.py │ │ └── llava_phi.py │ ├── llava_arch.py │ ├── model_factory.py │ ├── multimodal_encoder │ │ ├── builder.py │ │ ├── merge.py │ │ └── siglip_encoder.py │ └── multimodal_projector │ │ └── builder.py │ ├── train │ ├── __init__.py │ ├── llava_trainer.py │ ├── train.py │ └── train_utils.py │ └── utils.py ├── UReader ├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── app.py ├── assets │ ├── -twitter-blue.svg │ ├── Demo-ModelScope-brightgreen.svg │ ├── LICENSE-Apache License-blue.svg │ ├── Paper-Arxiv-orange.svg │ ├── Paper-PDF-orange.svg │ ├── intro_case.jpg │ ├── model.png │ ├── modelscope.png │ └── modelscopeIcon.svg ├── configs │ └── sft │ │ └── release.yaml ├── ds_config.json ├── examples │ ├── Yao_Ming.jpeg │ ├── ca.jpeg │ ├── docowl.jpg │ ├── fridge.jpg │ ├── fruits.jpg │ ├── laundry.jpeg │ ├── monalisa-fun.jpg │ ├── monday.jpg │ ├── mug_ad.jpeg │ ├── owl.jpg │ ├── rap.jpeg │ ├── table.jpg │ ├── titanic.jpeg │ ├── vga.jpeg │ └── website.jpg ├── mplug_owl │ ├── __init__.py │ ├── configuration_mplug_owl.py │ ├── modeling_mplug_owl.py │ ├── processing_mplug_owl.py │ └── tokenization_mplug_owl.py ├── pipeline │ ├── __init__.py │ ├── data_utils │ │ ├── __init__.py │ │ ├── processors │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── caption_processor.py │ │ │ ├── default_processor.py │ │ │ └── doc_processor.py │ │ ├── randaugment.py │ │ ├── registry.py │ │ └── xgpt3_dataset.py │ ├── eval_utils │ │ ├── __init__.py │ │ ├── due_evaluator │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ ├── __version__.py │ │ │ ├── due_evaluator.py │ │ │ ├── py.typed │ │ │ ├── scorers │ │ │ │ ├── __init__.py │ │ │ │ ├── accuracy_scorer.py │ │ │ │ ├── anls_scorer.py │ │ │ │ ├── base_scorer.py │ │ │ │ ├── fscorer.py │ │ │ │ ├── geval_scorer.py │ │ │ │ ├── group_anls.py │ │ │ │ ├── mean_fscorer.py │ │ │ │ └── wtq_scorer.py │ │ │ └── utils.py │ │ ├── run_evaluation.py │ │ └── tools.py │ ├── evaluation.py │ ├── interface.py │ ├── train.py │ ├── trainer.py │ └── utils.py ├── scripts │ ├── eval │ │ └── eval_benchmark.sh │ ├── train_it.sh │ └── train_it_v100.sh └── serve │ ├── __init__.py │ ├── conversation.py │ ├── gradio_css.py │ ├── gradio_patch.py │ ├── io_utils.py │ ├── model_utils.py │ ├── model_worker.py │ ├── serve_utils.py │ └── web_server.py └── assets ├── docowl1.5_chat_case.png ├── docowl2_github_case.jpg ├── huggingface.png ├── mPLUG_new1.png └── modelscope.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | #IDE 7 | .idea/ 8 | .DS_Store 9 | 10 | #SOFA 11 | SOFA/build 12 | SOFA/sofa.egg-info 13 | SOFA/dist 14 | 15 | #Megatron 16 | megatron/fused_kernels/build/ 17 | 18 | *.ipynb 19 | *.pth 20 | *.pt 21 | *.tar 22 | *.out 23 | *.log 24 | *.txt 25 | tensorboard/ 26 | save_model/ 27 | 28 | debug/ 29 | language_evaluation 30 | evalcap 31 | .ipynb_checkpoints 32 | 33 | core-* 34 | .nfs* 35 | -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_0.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_13.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_15.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_16.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_20.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_26.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_31.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_32.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_34.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_34.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_35.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_35.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_37.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_37.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_41.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_41.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_44.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_46.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_46.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_49.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_49.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_57.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_69.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_69.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_76.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/chart_80.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/chart_80.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_0.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_1.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_10.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_19.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_2.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_22.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_27.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_35.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_35.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_40.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_43.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_43.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_44.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_47.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_47.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_52.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_52.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_57.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_67.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_67.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_7.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_8.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_86.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_86.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_91.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_91.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/document_97.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/document_97.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_1.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_10.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_14.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_15.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_21.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_32.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_34.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_34.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_38.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_38.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_42.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_42.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_44.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_5.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_56.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_56.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_69.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_69.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_75.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_75.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_77.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_77.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_79.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_79.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_8.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_81.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_81.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_90.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_90.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/natural_95.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/natural_95.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_1.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_11.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_25.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_27.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_28.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_30.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_31.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_34.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_34.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_4.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_44.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_47.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_47.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_72.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_77.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_77.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_8.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_83.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_83.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_87.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_87.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_91.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_91.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/screenshot_96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/screenshot_96.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_0.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_13.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_17.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_29.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_29.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_39.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_42.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_42.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_50.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_54.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_54.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_56.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_56.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_57.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_60.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_7.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_73.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_73.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_75.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_75.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_76.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_79.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_79.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_87.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_87.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_89.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_89.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_97.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_97.png -------------------------------------------------------------------------------- /DocOwl/DocLLM/images/table_98.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/DocLLM/images/table_98.png -------------------------------------------------------------------------------- /DocOwl/assets/-twitter-blue.svg: -------------------------------------------------------------------------------- 1 | twittertwitter -------------------------------------------------------------------------------- /DocOwl/assets/Demo-ModelScope-brightgreen.svg: -------------------------------------------------------------------------------- 1 | Demo: ModelScopeDemoModelScope -------------------------------------------------------------------------------- /DocOwl/assets/LICENSE-Apache License-blue.svg: -------------------------------------------------------------------------------- 1 | LICENSE: Apache LicenseLICENSEApache License -------------------------------------------------------------------------------- /DocOwl/assets/Paper-Arxiv-orange.svg: -------------------------------------------------------------------------------- 1 | Paper: ArxivPaperArxiv -------------------------------------------------------------------------------- /DocOwl/assets/Paper-PDF-orange.svg: -------------------------------------------------------------------------------- 1 | Paper: PDFPaperPDF -------------------------------------------------------------------------------- /DocOwl/assets/cases_git.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/assets/cases_git.jpg -------------------------------------------------------------------------------- /DocOwl/assets/mPLUG_new1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/assets/mPLUG_new1.png -------------------------------------------------------------------------------- /DocOwl/assets/modelscopeIcon.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DocOwl/assets/overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl/assets/overview.jpg -------------------------------------------------------------------------------- /DocOwl1.5/assets/Paper-Arxiv-orange.svg: -------------------------------------------------------------------------------- 1 | Paper: ArxivPaperArxiv -------------------------------------------------------------------------------- /DocOwl1.5/assets/doc_instruct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl1.5/assets/doc_instruct.png -------------------------------------------------------------------------------- /DocOwl1.5/assets/modelscope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl1.5/assets/modelscope.png -------------------------------------------------------------------------------- /DocOwl1.5/assets/radar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl1.5/assets/radar.png -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | from .__main__ import cli_main 2 | from .due_evaluator import DueEvaluator 3 | 4 | __all__ = ['DueEvaluator', 'cli_main'] 5 | -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import sys 6 | from typing import Optional, Set 7 | import json 8 | 9 | from due_evaluator.due_evaluator import DueEvaluator 10 | from due_evaluator.utils import property_scores_to_string 11 | 12 | 13 | def parse_args(): 14 | """Parse CLI arguments. 15 | 16 | Returns: 17 | namespace: namespace with parsed variables. 18 | 19 | """ 20 | parser = argparse.ArgumentParser('Document Understanding Evaluator') 21 | parser.add_argument( 22 | '--out-files', 23 | '-o', 24 | type=argparse.FileType('r', encoding='utf-8'), 25 | required=True, 26 | nargs='+', 27 | help='Out file to evaluate', 28 | ) 29 | parser.add_argument( 30 | '--reference', '-r', type=argparse.FileType('r', encoding='utf-8'), required=True, help='Reference file', 31 | ) 32 | parser.add_argument('--metric', '-m', type=str, default='F1', choices=['F1', 'MEAN-F1', 'ANLS', 'WTQ', 'GROUP-ANLS']) 33 | parser.add_argument( 34 | '--return-score', 35 | default='F1', 36 | choices=['F1', 'mean-F1', 'ANLS', 'mean-Precision', 'mean-Recall', 'WTQ'], 37 | help='Return WR-like mean-F1 score', 38 | ) 39 | parser.add_argument('--line-by-line', action='store_true', default=False, help='Return retults example-based') 40 | parser.add_argument( 41 | '--columns', type=str, nargs='+', default=['Precision', 'Recall', 'F1'], help='Columns', 42 | ) 43 | parser.add_argument( 44 | '--print-format', 45 | default='text', 46 | type=str, 47 | choices=['text', 'latex', 'json'], 48 | help='Print feature table in the given format', 49 | ) 50 | parser.add_argument('--properties', nargs='+', type=str, help='Property set to be limitted to') 51 | parser.add_argument( 52 | '--ignore-case', '-i', action='store_true', default=False, help='Property set to be limitted to', 53 | ) 54 | return parser.parse_args() 55 | 56 | 57 | def cli_main(args: argparse.Namespace): 58 | """CLI main. 59 | 60 | Args: 61 | args: cli arguments 62 | """ 63 | reference = [json.loads(line) for line in args.reference] 64 | 65 | evaluators = [] 66 | for out_file in args.out_files: 67 | predictions = [json.loads(line) for line in out_file] 68 | 69 | property_set: Optional[Set[str]] 70 | if args.properties: 71 | property_set = args.properties 72 | else: 73 | property_set = None 74 | 75 | evaluators.append( 76 | DueEvaluator(reference, predictions, property_set, args.ignore_case, out_file.name, args.metric) 77 | ) 78 | 79 | prop_str = property_scores_to_string(evaluators, args.print_format, args.columns) 80 | if args.print_format != 'json': 81 | print(prop_str, file=sys.stderr) 82 | 83 | if args.line_by_line: 84 | for idx, score in enumerate(evaluators[0].line_by_line()): 85 | print(f'{idx}: {score}', file=sys.stderr) 86 | return prop_str 87 | 88 | 89 | def main() -> None: 90 | """Main.""" 91 | args = parse_args() 92 | cli_main(args) 93 | 94 | 95 | if __name__ == '__main__': 96 | main() 97 | -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/__version__.py: -------------------------------------------------------------------------------- 1 | """Version specification.""" 2 | 3 | VERSION = (0, 0, 8) 4 | __version__ = '.'.join(map(str, VERSION)) 5 | -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl1.5/evaluation/due_evaluator/py.typed -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/scorers/__init__.py: -------------------------------------------------------------------------------- 1 | from .anls_scorer import AnlsScorer 2 | from .base_scorer import BaseScorer 3 | from .fscorer import FScorer 4 | from .mean_fscorer import MeanFScorer 5 | from .wtq_scorer import WtqScorer 6 | from .group_anls import GroupAnlsScorer 7 | from .geval_scorer import GevalScorer 8 | 9 | __all__ = ['AnlsScorer', 'BaseScorer', 'FScorer', 'MeanFScorer', 'WtqScorer', 'GevalScorer', 'GroupAnlsScorer'] 10 | -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/scorers/accuracy_scorer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | from operator import itemgetter 4 | 5 | from .base_scorer import BaseScorer 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class AccuracyScorer(BaseScorer): 11 | """Accuracy Scorer.""" 12 | 13 | def __init__(self, threshold: float = 0.5): 14 | self.__scores: List[float] = [] 15 | self.threshold = threshold 16 | 17 | @property 18 | def scores(self): 19 | return self.__scores 20 | 21 | def check_denotation(self, out: list, ref: list) -> bool: 22 | return out == ref 23 | 24 | def add(self, out_items: List[dict], ref_items: List[dict]): 25 | """Add more items for computing corpus level scores. 26 | 27 | Args: 28 | out_items: outs from a single document (line) 29 | ref_items: reference of the evaluated document (line) 30 | 31 | """ 32 | out_ann = sorted(out_items['annotations'], key=itemgetter('key')) 33 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key')) 34 | assert [a['key'] for a in out_ann] == [a['key'] for a in ref_ann] 35 | 36 | for out, ref in zip(out_ann, ref_ann): 37 | o_values = [v['value'] for v in out['values']] 38 | r_values = [v['value'] for v in ref['values']] 39 | score = int(self.check_denotation(o_values, r_values)) 40 | self.__scores.append(score) 41 | 42 | def score(self) -> float: 43 | if self.__scores: 44 | return sum(self.__scores) / len(self.__scores) 45 | return 0.0 46 | 47 | @classmethod 48 | def support_feature_scores(cls) -> bool: 49 | return False 50 | 51 | @classmethod 52 | def metric_name(cls) -> str: 53 | return "Accuracy" 54 | -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/scorers/anls_scorer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | from operator import itemgetter 4 | 5 | import textdistance 6 | 7 | from due_evaluator.scorers.base_scorer import BaseScorer 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class AnlsScorer(BaseScorer): 13 | """ANSL Scorer.""" 14 | 15 | def __init__(self, threshold: float = 0.5): 16 | self.__scores: List[float] = [] 17 | self.threshold = threshold 18 | 19 | @property 20 | def scores(self): 21 | return self.__scores 22 | 23 | def add(self, out_items: List[dict], ref_items: List[dict]): 24 | """Add more items for computing corpus level scores. 25 | 26 | Args: 27 | out_items: outs from a single document (line) 28 | ref_items: reference of the evaluated document (line) 29 | 30 | """ 31 | out_ann = sorted(out_items['annotations'], key=itemgetter('key')) 32 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key')) 33 | assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann] 34 | 35 | """try: 36 | # assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann] 37 | out_keys = [a['key'][:100] for a in out_ann] 38 | ref_keys = [a['key'][:100] for a in ref_ann] 39 | # assert out_keys == ref_keys 40 | for i in range(len(out_keys)): 41 | try: 42 | assert out_keys[i] == ref_keys[i] 43 | except AssertionError as e: 44 | print(out_keys[i]) 45 | print(ref_keys[i]) 46 | print('==============') 47 | # exit(0) 48 | 49 | except AssertionError as e: 50 | print('key of pred and gt unmatched:') 51 | # print('pred:', out_keys) 52 | # print('gt:', ref_keys) 53 | exit(0)""" 54 | 55 | for out, ref in zip(out_ann, ref_ann): 56 | assert len(out['values']) == 1 57 | val = out['values'][0]['value'] 58 | possible_vals = ref['values'][0]['value_variants'] 59 | best_score = max([textdistance.levenshtein.normalized_similarity(val, pos) 60 | for pos in possible_vals]) 61 | if 1 - self.threshold >= best_score: 62 | best_score = 0.0 63 | self.__scores.append(best_score) 64 | 65 | def score(self) -> float: 66 | if self.__scores: 67 | return sum(self.__scores) / len(self.__scores) 68 | return 0.0 69 | 70 | @classmethod 71 | def support_feature_scores(cls) -> bool: 72 | return False 73 | 74 | @classmethod 75 | def metric_name(cls) -> str: 76 | return "ANLS" 77 | -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/scorers/base_scorer.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List 3 | 4 | 5 | class BaseScorer(abc.ABC): 6 | """Abstract class for scorers.""" 7 | 8 | @abc.abstractmethod 9 | def add(self, out_items: List[dict], ref_items: List[dict]): 10 | pass 11 | 12 | @abc.abstractmethod 13 | def score(self): 14 | pass 15 | 16 | @abc.abstractclassmethod 17 | def support_feature_scores(cls) -> bool: 18 | pass 19 | 20 | @abc.abstractclassmethod 21 | def metric_name(cls) -> str: 22 | pass 23 | -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/scorers/geval_scorer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import tempfile 3 | from collections import defaultdict 4 | import os 5 | 6 | from due_evaluator.scorers.fscorer import FScorer 7 | from due_evaluator.scorers.base_scorer import BaseScorer 8 | 9 | 10 | GEVAL_BINARY = os.getenv('GEVAL_BINARY', '/data/shared/bin/geval') 11 | GEVAL_METRIC = os.getenv('GEVAL_METRIC', 'MultiLabel-F1:cN') 12 | 13 | 14 | class GevalScorer(BaseScorer): 15 | def __init__(self): 16 | self.__ref = tempfile.NamedTemporaryFile('w+t') 17 | self.__out = tempfile.NamedTemporaryFile('w+t') 18 | self.__ref_data = defaultdict(set) 19 | self.__out_data = defaultdict(set) 20 | 21 | @staticmethod 22 | def add_to_geval_data(data, line): 23 | name = line['name'] 24 | for annotation in line['annotations']: 25 | for idx, val in enumerate(annotation['values'], 1): 26 | for child in val['children']: 27 | new_name = child['key'] + '__' + str(idx) if '__' in child['key'] else child['key'] 28 | if child['values'] and child['values'] != ['']: 29 | new_value = '|'.join([v['value'].replace(' ', '_') for v in child['values']]) 30 | data[name].add(f'{new_name}={new_value}') 31 | 32 | def save_geval_files(self): 33 | for name in sorted(self.__ref_data.keys()): 34 | self.__ref.write(' '.join(self.__ref_data[name]) + '\n') 35 | self.__out.write(' '.join(self.__out_data[name]) + '\n') 36 | 37 | def add(self, out_items: List[str], ref_items: List[str]): 38 | self.add_to_geval_data(self.__out_data, out_items) 39 | self.add_to_geval_data(self.__ref_data, ref_items) 40 | 41 | def support_feature_scores(cls) -> bool: 42 | return False 43 | 44 | def metric_name(cls) -> str: 45 | return "GEVAL" 46 | 47 | def run_geval(self): 48 | self.__ref.flush() 49 | self.__out.flush() 50 | try: 51 | return float(os.popen(f'{GEVAL_BINARY} -o {self.__out.name} -e {self.__ref.name} --metric {GEVAL_METRIC}').read()) 52 | except: 53 | return -1 54 | 55 | def score(self) -> float: 56 | self.save_geval_files() 57 | return self.run_geval() 58 | -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/scorers/mean_fscorer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from due_evaluator.scorers.fscorer import FScorer 4 | from due_evaluator.scorers.base_scorer import BaseScorer 5 | 6 | 7 | class MeanFScorer(BaseScorer): 8 | def __init__(self): 9 | self.__scores: List[float] = [] 10 | 11 | def add(self, out_items: List[str], ref_items: List[str]): 12 | fscorer = FScorer() 13 | fscorer.add(out_items, ref_items) 14 | self.__scores.append(fscorer.f_score()) 15 | 16 | def support_feature_scores(cls) -> bool: 17 | return False 18 | 19 | def metric_name(cls) -> str: 20 | return "MEAN-F1" 21 | 22 | def score(self) -> float: 23 | if self.__scores: 24 | return sum(self.__scores) / len(self.__scores) 25 | return 0.0 26 | -------------------------------------------------------------------------------- /DocOwl1.5/evaluation/due_evaluator/utils.py: -------------------------------------------------------------------------------- 1 | from due_evaluator.scorers.fscorer import FScorer 2 | from typing import Dict, List, Optional, Sequence, Union 3 | 4 | import pandas as pd 5 | 6 | from due_evaluator.due_evaluator import DueEvaluator 7 | 8 | 9 | def dataframe_to_print(df: pd.DataFrame, print_format: Optional[str] = 'text') -> str: 10 | """Export dataframe to json or plain text. 11 | 12 | Args: 13 | df (pd.DataFrame): data 14 | print_format (str, optional): Print format. Defaults to 'text'. 15 | 16 | Raises: 17 | ValueError: unknown print_format 18 | 19 | Returns: 20 | str: printed version of dataframe 21 | 22 | """ 23 | out: str 24 | if print_format == 'latex': 25 | out = df.reset_index().to_latex(index=False) 26 | elif print_format == 'text': 27 | out = df.reset_index().to_string(index=False) 28 | elif print_format == 'json': 29 | out = df.to_json(orient='index') 30 | else: 31 | raise ValueError() 32 | return out 33 | 34 | 35 | def property_scores_to_string( 36 | dues: List[DueEvaluator], print_format: str = 'text', columns: Sequence[str] = ('Precision', 'Recall', 'F-1'), 37 | ) -> str: 38 | """Print out scores per property. 39 | 40 | Args: 41 | dues: List of DueEvaluators 42 | print_format: output format: text or latex 43 | columns: a list of metrics to print 44 | 45 | Returns: 46 | str: string table with feature scores. 47 | 48 | """ 49 | data = [] 50 | for property_name in sorted(dues[0].property_scorers.keys()) + ['ALL']: 51 | row_data: Dict[str, Union[str, float]] = {} 52 | row_data['Label'] = property_name 53 | for due in dues: 54 | if len(dues) == 1: 55 | suffix = '' 56 | else: 57 | suffix = f' ({due.path})' 58 | if property_name == 'ALL': 59 | scorer = due.general_scorer 60 | else: 61 | scorer = due.property_scorers[property_name] 62 | 63 | row_data[scorer.metric_name() + suffix] = scorer.score() 64 | if isinstance(scorer, FScorer): 65 | if 'Precision' in columns: 66 | row_data['Precision' + suffix] = scorer.precision() 67 | if 'Recall' in columns: 68 | row_data['Recall' + suffix] = scorer.recall() 69 | data.append(row_data) 70 | 71 | df = pd.DataFrame(data) 72 | df.set_index('Label', drop=True, inplace=True) 73 | 74 | return dataframe_to_print(df, print_format) 75 | -------------------------------------------------------------------------------- /DocOwl1.5/mplug_docowl/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import MPLUGDocOwlLlamaForCausalLM 2 | from .processor import DocProcessor -------------------------------------------------------------------------------- /DocOwl1.5/mplug_docowl/constants.py: -------------------------------------------------------------------------------- 1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30 2 | WORKER_HEART_BEAT_INTERVAL = 15 3 | 4 | LOGDIR = "./demo_logs" 5 | 6 | # Model Constants 7 | IGNORE_INDEX = -100 8 | IMAGE_TOKEN_INDEX = -200 9 | DEFAULT_IMAGE_TOKEN = "<|image|>" 10 | -------------------------------------------------------------------------------- /DocOwl1.5/mplug_docowl/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .modeling_mplug_docowl import MPLUGDocOwlLlamaForCausalLM 2 | from .configuration_mplug_docowl import MPLUGDocOwlConfig -------------------------------------------------------------------------------- /DocOwl1.5/mplug_docowl/model/utils.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoConfig 2 | 3 | 4 | def auto_upgrade(config): 5 | cfg = AutoConfig.from_pretrained(config) 6 | if 'mplug_owl2' in config and 'mplug_owl2' not in cfg.model_type: 7 | assert cfg.model_type == 'mplug_owl2' 8 | print("You are using newer LLaVA code base, while the checkpoint of v0 is from older code base.") 9 | print("You must upgrade the checkpoint to the new code base (this can be done automatically).") 10 | confirm = input("Please confirm that you want to upgrade the checkpoint. [Y/N]") 11 | if confirm.lower() in ["y", "yes"]: 12 | print("Upgrading checkpoint...") 13 | assert len(cfg.architectures) == 1 14 | setattr(cfg.__class__, "model_type", "mplug_owl2") 15 | cfg.architectures[0] = 'LlavaLlamaForCausalLM' 16 | cfg.save_pretrained(config) 17 | print("Checkpoint upgraded.") 18 | else: 19 | print("Checkpoint upgrade aborted.") 20 | exit(1) -------------------------------------------------------------------------------- /DocOwl1.5/scripts/finetune_docowl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ $MASTER_ADDR ];then 3 | echo $MASTER_ADDR 4 | echo $MASTER_PORT 5 | echo $WORLD_SIZE 6 | echo $RANK 7 | else 8 | MASTER_ADDR=127.0.0.1 9 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15 10 | WORLD_SIZE=1 11 | RANK=0 12 | fi 13 | # Change for multinode config 14 | NNODES=${WORLD_SIZE} 15 | NODE_RANK=${RANK} 16 | GPUS_PER_NODE=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) 17 | # GPUS_PER_NODE=1 18 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT" 19 | echo $DISTRIBUTED_ARGS 20 | 21 | # change LOAD to your local path of DocOwl1.5-stage1 22 | LOAD='./mPLUG/DocOwl1.5-stage1' 23 | 24 | # batch size = per_device_train_batch_size x GPUS_PER_NODE x NNODES x gradient_accumulation_steps 25 | DATA_FILE=./DocDownstream-1.0/train.jsonl 26 | torchrun $DISTRIBUTED_ARGS mplug_docowl/train/train_docowl.py \ 27 | --deepspeed ./scripts/zero2.json \ 28 | --model_name_or_path $LOAD \ 29 | --version v1 \ 30 | --data_path $DATA_FILE \ 31 | --image_folder './DocDownstream-1.0/' \ 32 | --image_size 448 \ 33 | --crop_anchors 'grid_9' \ 34 | --add_global_img True \ 35 | --add_textual_crop_indicator True \ 36 | --bf16 True \ 37 | --output_dir ./checkpoints/docowl1.5 \ 38 | --num_train_epochs 3 \ 39 | --per_device_train_batch_size 1 \ 40 | --per_device_eval_batch_size 1 \ 41 | --gradient_accumulation_steps 8 \ 42 | --evaluation_strategy "no" \ 43 | --save_strategy "steps" \ 44 | --save_steps 500 \ 45 | --save_total_limit 4 \ 46 | --learning_rate 2e-5 \ 47 | --weight_decay 0. \ 48 | --warmup_ratio 0.03 \ 49 | --lr_scheduler_type "cosine" \ 50 | --logging_steps 1 \ 51 | --tf32 True \ 52 | --model_max_length 3600 \ 53 | --gradient_checkpointing True \ 54 | --tune_vision2text True \ 55 | --freeze_vision_model True \ 56 | --freeze_backbone False \ 57 | --dataloader_num_workers 4 \ 58 | --lazy_preprocess True \ 59 | --report_to tensorboard -------------------------------------------------------------------------------- /DocOwl1.5/scripts/finetune_docowl_lora.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ $MASTER_ADDR ];then 3 | echo $MASTER_ADDR 4 | echo $MASTER_PORT 5 | echo $WORLD_SIZE 6 | echo $RANK 7 | else 8 | MASTER_ADDR=127.0.0.1 9 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15 10 | WORLD_SIZE=1 11 | RANK=0 12 | fi 13 | # Change for multinode config 14 | NNODES=${WORLD_SIZE} 15 | NODE_RANK=${RANK} 16 | GPUS_PER_NODE=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) 17 | # GPUS_PER_NODE=1 18 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT" 19 | echo $DISTRIBUTED_ARGS 20 | 21 | # change LOAD to your local path of DocOwl1.5-stage1 22 | LOAD='./mPLUG/DocOwl1.5-stage1' 23 | 24 | # batch size = per_device_train_batch_size x GPUS_PER_NODE x NNODES x gradient_accumulation_steps 25 | DATA_FILE=./DocDownstream-1.0/train.jsonl 26 | torchrun $DISTRIBUTED_ARGS mplug_docowl/train/train_docowl.py \ 27 | --lora_enable True --lora_r 128 --lora_alpha 256 --vision2text_lr 2e-5 \ 28 | --deepspeed ./scripts/zero2.json \ 29 | --model_name_or_path $LOAD \ 30 | --version v1 \ 31 | --data_path $DATA_FILE \ 32 | --image_folder './DocDownstream-1.0/' \ 33 | --image_size 448 \ 34 | --crop_anchors 'grid_9' \ 35 | --add_global_img True \ 36 | --add_textual_crop_indicator True \ 37 | --bf16 True \ 38 | --output_dir ./checkpoints/docowl1.5-lora \ 39 | --num_train_epochs 3 \ 40 | --per_device_train_batch_size 1 \ 41 | --per_device_eval_batch_size 1 \ 42 | --gradient_accumulation_steps 8 \ 43 | --evaluation_strategy "no" \ 44 | --save_strategy "steps" \ 45 | --save_steps 500 \ 46 | --save_total_limit 4 \ 47 | --learning_rate 1e-4 \ 48 | --weight_decay 0. \ 49 | --warmup_ratio 0.03 \ 50 | --lr_scheduler_type "cosine" \ 51 | --logging_steps 1 \ 52 | --tf32 True \ 53 | --model_max_length 3600 \ 54 | --gradient_checkpointing True \ 55 | --tune_vision2text True \ 56 | --freeze_vision_model True \ 57 | --freeze_backbone True \ 58 | --dataloader_num_workers 4 \ 59 | --lazy_preprocess True \ 60 | --report_to tensorboard -------------------------------------------------------------------------------- /DocOwl1.5/scripts/zero2.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | "train_micro_batch_size_per_gpu": "auto", 14 | "train_batch_size": "auto", 15 | "gradient_accumulation_steps": "auto", 16 | "zero_optimization": { 17 | "stage": 2, 18 | "overlap_comm": true, 19 | "contiguous_gradients": true, 20 | "sub_group_size": 1e9, 21 | "reduce_bucket_size": "auto" 22 | } 23 | } -------------------------------------------------------------------------------- /DocOwl1.5/scripts/zero3.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | "train_micro_batch_size_per_gpu": "auto", 14 | "train_batch_size": "auto", 15 | "gradient_accumulation_steps": "auto", 16 | "zero_optimization": { 17 | "stage": 3, 18 | "overlap_comm": true, 19 | "contiguous_gradients": true, 20 | "sub_group_size": 1e9, 21 | "reduce_bucket_size": "auto", 22 | "stage3_param_persistence_threshold": "auto", 23 | "stage3_max_live_parameters": 1e9, 24 | "stage3_max_reuse_distance": 1e9, 25 | "stage3_prefetch_bucket_size": "auto", 26 | "stage3_gather_16bit_weights_on_model_save": true 27 | } 28 | } -------------------------------------------------------------------------------- /DocOwl1.5/scripts/zero3_offload.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | "optimizer": { 14 | "type": "AdamW", 15 | "params": { 16 | "lr": "auto", 17 | "betas": "auto", 18 | "eps": "auto", 19 | "weight_decay": "auto" 20 | } 21 | }, 22 | "scheduler": { 23 | "type": "WarmupLR", 24 | "params": { 25 | "warmup_min_lr": "auto", 26 | "warmup_max_lr": "auto", 27 | "warmup_num_steps": "auto" 28 | } 29 | }, 30 | "zero_optimization": { 31 | "stage": 3, 32 | "offload_optimizer": { 33 | "device": "cpu", 34 | "pin_memory": true 35 | }, 36 | "offload_param": { 37 | "device": "cpu", 38 | "pin_memory": true 39 | }, 40 | "overlap_comm": true, 41 | "contiguous_gradients": true, 42 | "sub_group_size": 1e9, 43 | "reduce_bucket_size": "auto", 44 | "stage3_prefetch_bucket_size": "auto", 45 | "stage3_param_persistence_threshold": "auto", 46 | "stage3_max_live_parameters": 1e9, 47 | "stage3_max_reuse_distance": 1e9, 48 | "gather_16bit_weights_on_model_save": true 49 | }, 50 | "gradient_accumulation_steps": "auto", 51 | "gradient_clipping": "auto", 52 | "train_batch_size": "auto", 53 | "train_micro_batch_size_per_gpu": "auto", 54 | "steps_per_print": 1e5, 55 | "wall_clock_breakdown": false 56 | } -------------------------------------------------------------------------------- /DocOwl2/assets/Paper-Arxiv-orange.svg: -------------------------------------------------------------------------------- 1 | Paper: ArxivPaperArxiv -------------------------------------------------------------------------------- /DocOwl2/assets/docowl2_effiency_and_case.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl2/assets/docowl2_effiency_and_case.jpg -------------------------------------------------------------------------------- /DocOwl2/assets/modelscope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl2/assets/modelscope.png -------------------------------------------------------------------------------- /DocOwl2/evaluation/due_evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | from .__main__ import cli_main 2 | from .due_evaluator import DueEvaluator 3 | 4 | __all__ = ['DueEvaluator', 'cli_main'] 5 | -------------------------------------------------------------------------------- /DocOwl2/evaluation/due_evaluator/__version__.py: -------------------------------------------------------------------------------- 1 | """Version specification.""" 2 | 3 | VERSION = (0, 0, 8) 4 | __version__ = '.'.join(map(str, VERSION)) 5 | -------------------------------------------------------------------------------- /DocOwl2/evaluation/due_evaluator/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/DocOwl2/evaluation/due_evaluator/py.typed -------------------------------------------------------------------------------- /DocOwl2/evaluation/due_evaluator/scorers/__init__.py: -------------------------------------------------------------------------------- 1 | from .anls_scorer import AnlsScorer 2 | from .base_scorer import BaseScorer 3 | from .fscorer import FScorer 4 | from .mean_fscorer import MeanFScorer 5 | from .wtq_scorer import WtqScorer 6 | from .group_anls import GroupAnlsScorer 7 | from .geval_scorer import GevalScorer 8 | 9 | __all__ = ['AnlsScorer', 'BaseScorer', 'FScorer', 'MeanFScorer', 'WtqScorer', 'GevalScorer', 'GroupAnlsScorer'] 10 | -------------------------------------------------------------------------------- /DocOwl2/evaluation/due_evaluator/scorers/accuracy_scorer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | from operator import itemgetter 4 | 5 | from .base_scorer import BaseScorer 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class AccuracyScorer(BaseScorer): 11 | """Accuracy Scorer.""" 12 | 13 | def __init__(self, threshold: float = 0.5): 14 | self.__scores: List[float] = [] 15 | self.threshold = threshold 16 | 17 | @property 18 | def scores(self): 19 | return self.__scores 20 | 21 | def check_denotation(self, out: list, ref: list) -> bool: 22 | return out == ref 23 | 24 | def add(self, out_items: List[dict], ref_items: List[dict]): 25 | """Add more items for computing corpus level scores. 26 | 27 | Args: 28 | out_items: outs from a single document (line) 29 | ref_items: reference of the evaluated document (line) 30 | 31 | """ 32 | out_ann = sorted(out_items['annotations'], key=itemgetter('key')) 33 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key')) 34 | assert [a['key'] for a in out_ann] == [a['key'] for a in ref_ann] 35 | 36 | for out, ref in zip(out_ann, ref_ann): 37 | o_values = [v['value'] for v in out['values']] 38 | r_values = [v['value'] for v in ref['values']] 39 | score = int(self.check_denotation(o_values, r_values)) 40 | self.__scores.append(score) 41 | 42 | def score(self) -> float: 43 | if self.__scores: 44 | return sum(self.__scores) / len(self.__scores) 45 | return 0.0 46 | 47 | @classmethod 48 | def support_feature_scores(cls) -> bool: 49 | return False 50 | 51 | @classmethod 52 | def metric_name(cls) -> str: 53 | return "Accuracy" 54 | -------------------------------------------------------------------------------- /DocOwl2/evaluation/due_evaluator/scorers/anls_scorer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | from operator import itemgetter 4 | 5 | import textdistance 6 | 7 | from due_evaluator.scorers.base_scorer import BaseScorer 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class AnlsScorer(BaseScorer): 13 | """ANSL Scorer.""" 14 | 15 | def __init__(self, threshold: float = 0.5): 16 | self.__scores: List[float] = [] 17 | self.threshold = threshold 18 | 19 | @property 20 | def scores(self): 21 | return self.__scores 22 | 23 | def add(self, out_items: List[dict], ref_items: List[dict]): 24 | """Add more items for computing corpus level scores. 25 | 26 | Args: 27 | out_items: outs from a single document (line) 28 | ref_items: reference of the evaluated document (line) 29 | 30 | """ 31 | out_ann = sorted(out_items['annotations'], key=itemgetter('key')) 32 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key')) 33 | assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann] 34 | 35 | """try: 36 | # assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann] 37 | out_keys = [a['key'][:100] for a in out_ann] 38 | ref_keys = [a['key'][:100] for a in ref_ann] 39 | # assert out_keys == ref_keys 40 | for i in range(len(out_keys)): 41 | try: 42 | assert out_keys[i] == ref_keys[i] 43 | except AssertionError as e: 44 | print(out_keys[i]) 45 | print(ref_keys[i]) 46 | print('==============') 47 | # exit(0) 48 | 49 | except AssertionError as e: 50 | print('key of pred and gt unmatched:') 51 | # print('pred:', out_keys) 52 | # print('gt:', ref_keys) 53 | exit(0)""" 54 | 55 | for out, ref in zip(out_ann, ref_ann): 56 | assert len(out['values']) == 1 57 | val = out['values'][0]['value'] 58 | possible_vals = ref['values'][0]['value_variants'] 59 | best_score = max([textdistance.levenshtein.normalized_similarity(val, pos) 60 | for pos in possible_vals]) 61 | if 1 - self.threshold >= best_score: 62 | best_score = 0.0 63 | self.__scores.append(best_score) 64 | 65 | def score(self) -> float: 66 | if self.__scores: 67 | return sum(self.__scores) / len(self.__scores) 68 | return 0.0 69 | 70 | @classmethod 71 | def support_feature_scores(cls) -> bool: 72 | return False 73 | 74 | @classmethod 75 | def metric_name(cls) -> str: 76 | return "ANLS" 77 | -------------------------------------------------------------------------------- /DocOwl2/evaluation/due_evaluator/scorers/base_scorer.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List 3 | 4 | 5 | class BaseScorer(abc.ABC): 6 | """Abstract class for scorers.""" 7 | 8 | @abc.abstractmethod 9 | def add(self, out_items: List[dict], ref_items: List[dict]): 10 | pass 11 | 12 | @abc.abstractmethod 13 | def score(self): 14 | pass 15 | 16 | @abc.abstractclassmethod 17 | def support_feature_scores(cls) -> bool: 18 | pass 19 | 20 | @abc.abstractclassmethod 21 | def metric_name(cls) -> str: 22 | pass 23 | -------------------------------------------------------------------------------- /DocOwl2/evaluation/due_evaluator/scorers/geval_scorer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import tempfile 3 | from collections import defaultdict 4 | import os 5 | 6 | from due_evaluator.scorers.fscorer import FScorer 7 | from due_evaluator.scorers.base_scorer import BaseScorer 8 | 9 | 10 | GEVAL_BINARY = os.getenv('GEVAL_BINARY', '/data/shared/bin/geval') 11 | GEVAL_METRIC = os.getenv('GEVAL_METRIC', 'MultiLabel-F1:cN') 12 | 13 | 14 | class GevalScorer(BaseScorer): 15 | def __init__(self): 16 | self.__ref = tempfile.NamedTemporaryFile('w+t') 17 | self.__out = tempfile.NamedTemporaryFile('w+t') 18 | self.__ref_data = defaultdict(set) 19 | self.__out_data = defaultdict(set) 20 | 21 | @staticmethod 22 | def add_to_geval_data(data, line): 23 | name = line['name'] 24 | for annotation in line['annotations']: 25 | for idx, val in enumerate(annotation['values'], 1): 26 | for child in val['children']: 27 | new_name = child['key'] + '__' + str(idx) if '__' in child['key'] else child['key'] 28 | if child['values'] and child['values'] != ['']: 29 | new_value = '|'.join([v['value'].replace(' ', '_') for v in child['values']]) 30 | data[name].add(f'{new_name}={new_value}') 31 | 32 | def save_geval_files(self): 33 | for name in sorted(self.__ref_data.keys()): 34 | self.__ref.write(' '.join(self.__ref_data[name]) + '\n') 35 | self.__out.write(' '.join(self.__out_data[name]) + '\n') 36 | 37 | def add(self, out_items: List[str], ref_items: List[str]): 38 | self.add_to_geval_data(self.__out_data, out_items) 39 | self.add_to_geval_data(self.__ref_data, ref_items) 40 | 41 | def support_feature_scores(cls) -> bool: 42 | return False 43 | 44 | def metric_name(cls) -> str: 45 | return "GEVAL" 46 | 47 | def run_geval(self): 48 | self.__ref.flush() 49 | self.__out.flush() 50 | try: 51 | return float(os.popen(f'{GEVAL_BINARY} -o {self.__out.name} -e {self.__ref.name} --metric {GEVAL_METRIC}').read()) 52 | except: 53 | return -1 54 | 55 | def score(self) -> float: 56 | self.save_geval_files() 57 | return self.run_geval() 58 | -------------------------------------------------------------------------------- /DocOwl2/evaluation/due_evaluator/scorers/mean_fscorer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from due_evaluator.scorers.fscorer import FScorer 4 | from due_evaluator.scorers.base_scorer import BaseScorer 5 | 6 | 7 | class MeanFScorer(BaseScorer): 8 | def __init__(self): 9 | self.__scores: List[float] = [] 10 | 11 | def add(self, out_items: List[str], ref_items: List[str]): 12 | fscorer = FScorer() 13 | fscorer.add(out_items, ref_items) 14 | self.__scores.append(fscorer.f_score()) 15 | 16 | def support_feature_scores(cls) -> bool: 17 | return False 18 | 19 | def metric_name(cls) -> str: 20 | return "MEAN-F1" 21 | 22 | def score(self) -> float: 23 | if self.__scores: 24 | return sum(self.__scores) / len(self.__scores) 25 | return 0.0 26 | -------------------------------------------------------------------------------- /DocOwl2/evaluation/due_evaluator/utils.py: -------------------------------------------------------------------------------- 1 | from due_evaluator.scorers.fscorer import FScorer 2 | from typing import Dict, List, Optional, Sequence, Union 3 | 4 | import pandas as pd 5 | 6 | from due_evaluator.due_evaluator import DueEvaluator 7 | 8 | 9 | def dataframe_to_print(df: pd.DataFrame, print_format: Optional[str] = 'text') -> str: 10 | """Export dataframe to json or plain text. 11 | 12 | Args: 13 | df (pd.DataFrame): data 14 | print_format (str, optional): Print format. Defaults to 'text'. 15 | 16 | Raises: 17 | ValueError: unknown print_format 18 | 19 | Returns: 20 | str: printed version of dataframe 21 | 22 | """ 23 | out: str 24 | if print_format == 'latex': 25 | out = df.reset_index().to_latex(index=False) 26 | elif print_format == 'text': 27 | out = df.reset_index().to_string(index=False) 28 | elif print_format == 'json': 29 | out = df.to_json(orient='index') 30 | else: 31 | raise ValueError() 32 | return out 33 | 34 | 35 | def property_scores_to_string( 36 | dues: List[DueEvaluator], print_format: str = 'text', columns: Sequence[str] = ('Precision', 'Recall', 'F-1'), 37 | ) -> str: 38 | """Print out scores per property. 39 | 40 | Args: 41 | dues: List of DueEvaluators 42 | print_format: output format: text or latex 43 | columns: a list of metrics to print 44 | 45 | Returns: 46 | str: string table with feature scores. 47 | 48 | """ 49 | data = [] 50 | for property_name in sorted(dues[0].property_scorers.keys()) + ['ALL']: 51 | row_data: Dict[str, Union[str, float]] = {} 52 | row_data['Label'] = property_name 53 | for due in dues: 54 | if len(dues) == 1: 55 | suffix = '' 56 | else: 57 | suffix = f' ({due.path})' 58 | if property_name == 'ALL': 59 | scorer = due.general_scorer 60 | else: 61 | scorer = due.property_scorers[property_name] 62 | 63 | row_data[scorer.metric_name() + suffix] = scorer.score() 64 | if isinstance(scorer, FScorer): 65 | if 'Precision' in columns: 66 | row_data['Precision' + suffix] = scorer.precision() 67 | if 'Recall' in columns: 68 | row_data['Recall' + suffix] = scorer.recall() 69 | data.append(row_data) 70 | 71 | df = pd.DataFrame(data) 72 | df.set_index('Label', drop=True, inplace=True) 73 | 74 | return dataframe_to_print(df, print_format) 75 | -------------------------------------------------------------------------------- /PaperOwl/.gitattributes: -------------------------------------------------------------------------------- 1 | *.py eol=lf 2 | *.rst eol=lf 3 | *.md eol=lf 4 | *.mdx eol=lf -------------------------------------------------------------------------------- /PaperOwl/.gitignore: -------------------------------------------------------------------------------- 1 | evaluate_results* 2 | checkpoints/ 3 | benchmark_files/ 4 | ureader_images 5 | ureader_json 6 | ureader_images/ 7 | ureader_json/ 8 | # Initially taken from Github's Python gitignore file 9 | tensorboard/* 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | .ossutil_* 15 | # C extensions 16 | *.so 17 | 18 | # tests and logs 19 | tests/fixtures/cached_*_text.txt 20 | logs/ 21 | lightning_logs/ 22 | lang_code_data/ 23 | 24 | # Distribution / packaging 25 | .Python 26 | build/ 27 | develop-eggs/ 28 | dist/ 29 | downloads/ 30 | eggs/ 31 | .eggs/ 32 | lib/ 33 | lib64/ 34 | parts/ 35 | sdist/ 36 | var/ 37 | wheels/ 38 | *.egg-info/ 39 | .installed.cfg 40 | *.egg 41 | MANIFEST 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .nox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *.cover 63 | .hypothesis/ 64 | .pytest_cache/ 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | local_settings.py 73 | db.sqlite3 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | .python-version 97 | 98 | # celery beat schedule file 99 | celerybeat-schedule 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # vscode 132 | .vs 133 | .vscode 134 | 135 | # Pycharm 136 | .idea 137 | 138 | # TF code 139 | tensorflow_code 140 | 141 | # Models 142 | proc_data 143 | 144 | # examples 145 | runs 146 | /runs_old 147 | /wandb 148 | /output 149 | /configs_dev 150 | /scripts_dev 151 | # /examples/runs 152 | # /examples/**/*.args 153 | # /examples/rag/sweep 154 | 155 | # data 156 | /data 157 | serialization_dir 158 | 159 | # emacs 160 | *.*~ 161 | debug.env 162 | 163 | # vim 164 | .*.swp 165 | 166 | #ctags 167 | tags 168 | 169 | # pre-commit 170 | .pre-commit* 171 | 172 | # .lock 173 | *.lock 174 | 175 | # DS_Store (MacOS) 176 | .DS_Store 177 | 178 | # ruff 179 | .ruff_cache 180 | -------------------------------------------------------------------------------- /PaperOwl/assets/Paper-Arxiv-orange.svg: -------------------------------------------------------------------------------- 1 | Paper: ArxivPaperArxiv -------------------------------------------------------------------------------- /PaperOwl/assets/Paper-PDF-orange.svg: -------------------------------------------------------------------------------- 1 | Paper: PDFPaperPDF -------------------------------------------------------------------------------- /PaperOwl/assets/data_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/assets/data_process.png -------------------------------------------------------------------------------- /PaperOwl/assets/diagram_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/assets/diagram_distribution.png -------------------------------------------------------------------------------- /PaperOwl/assets/intro_case.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/assets/intro_case.jpeg -------------------------------------------------------------------------------- /PaperOwl/assets/paper_category.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/assets/paper_category.png -------------------------------------------------------------------------------- /PaperOwl/configs/sft/release.yaml: -------------------------------------------------------------------------------- 1 | train_processors: { 2 | sft: {type: 'DocNewMultiScaleSFTProcessor', image_size: 224, 3 | anchors: [[2, 2]]} 4 | } 5 | 6 | valid_processors: { 7 | sft: {type: 'DocNewMultiScaleSFTProcessor', image_size: 224, 8 | anchors: [[2, 2]]} 9 | } 10 | 11 | data_files: [ 12 | 'M-Paper/sft/3tasks_train.jsonl', 13 | 'M-Paper/sft/3tasks_val.jsonl' 14 | ] 15 | 16 | patch_pos_embed_type: post 17 | 18 | -------------------------------------------------------------------------------- /PaperOwl/ds_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | "zero_optimization": { 14 | "stage": 1 15 | }, 16 | "train_batch_size": "auto", 17 | "train_micro_batch_size_per_gpu": "auto" 18 | } -------------------------------------------------------------------------------- /PaperOwl/mplug_owl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import TYPE_CHECKING 15 | 16 | from transformers.utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available 17 | 18 | 19 | _import_structure = { 20 | "configuration_mplug_owl": ["MPLUG_OWL_PRETRAINED_CONFIG_ARCHIVE_MAP", "MplugOwlConfig"], 21 | "processing_mplug_owl": ["MplugOwlImageProcessor", "MplugOwlProcessor"], 22 | "tokenization_mplug_owl": ["MplugOwlTokenizer"], 23 | } 24 | 25 | try: 26 | if not is_tokenizers_available(): 27 | raise OptionalDependencyNotAvailable() 28 | except OptionalDependencyNotAvailable: 29 | pass 30 | 31 | 32 | try: 33 | if not is_torch_available(): 34 | raise OptionalDependencyNotAvailable() 35 | except OptionalDependencyNotAvailable: 36 | pass 37 | else: 38 | _import_structure["modeling_mplug_owl"] = [ 39 | "MPLUG_OWL_PRETRAINED_MODEL_ARCHIVE_LIST", 40 | "MplugOwlForConditionalGeneration", 41 | "MplugOwlModel", 42 | ] 43 | 44 | 45 | if TYPE_CHECKING: 46 | from .configuration_mplug_owl import MPLUG_OWL_PRETRAINED_CONFIG_ARCHIVE_MAP, MplugOwlConfig 47 | from .tokenization_mplug_owl import MplugOwlTokenizer 48 | 49 | try: 50 | if not is_tokenizers_available(): 51 | raise OptionalDependencyNotAvailable() 52 | except OptionalDependencyNotAvailable: 53 | pass 54 | 55 | try: 56 | if not is_torch_available(): 57 | raise OptionalDependencyNotAvailable() 58 | except OptionalDependencyNotAvailable: 59 | pass 60 | else: 61 | from .modeling_mplug_owl import ( 62 | MPLUG_OWL_PRETRAINED_MODEL_ARCHIVE_LIST, 63 | MplugOwlForConditionalGeneration, 64 | MplugOwlModel, 65 | MplugOwlPreTrainedModel, 66 | ) 67 | 68 | 69 | else: 70 | import sys 71 | 72 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 73 | 74 | from .configuration_mplug_owl import * 75 | from .modeling_mplug_owl import * 76 | from .processing_mplug_owl import * 77 | from .tokenization_mplug_owl import * 78 | -------------------------------------------------------------------------------- /PaperOwl/mplug_owl/tokenization_mplug_owl.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 x-plug and The HuggingFace Inc. team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Tokenization classes for MplugOwl.""" 16 | 17 | from transformers.utils import logging 18 | from transformers.models.llama.tokenization_llama import LlamaTokenizer 19 | 20 | 21 | logger = logging.get_logger(__name__) 22 | 23 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} 24 | 25 | PRETRAINED_VOCAB_FILES_MAP = { 26 | "vocab_file": { 27 | "MAGAer13/mplug-owl-llama-7b": "https://huggingface.co/MAGAer13/mplug-owl-llama-7b/resolve/main/vocab.txt", 28 | }, 29 | } 30 | 31 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { 32 | "MAGAer13/mplug-owl-llama-7b": 1024, 33 | } 34 | 35 | 36 | class MplugOwlTokenizer(LlamaTokenizer): 37 | def __init__( 38 | self, 39 | vocab_file, 40 | unk_token="", 41 | bos_token="", 42 | eos_token="", 43 | pad_token="", 44 | sp_model_kwargs=None, 45 | add_bos_token=False, 46 | add_eos_token=False, 47 | clean_up_tokenization_spaces=False, 48 | **kwargs, 49 | ): 50 | super().__init__( 51 | vocab_file, 52 | unk_token, 53 | bos_token, 54 | eos_token, 55 | pad_token, 56 | sp_model_kwargs, 57 | add_bos_token, 58 | add_eos_token, 59 | clean_up_tokenization_spaces, 60 | **kwargs, 61 | ) 62 | self.eod_id = self.eos_token_id 63 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/pipeline/__init__.py -------------------------------------------------------------------------------- /PaperOwl/pipeline/data_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .processors.builder import build_processors 2 | from .xgpt3_dataset import MultiModalDataset 3 | 4 | def train_valid_test_datasets_provider(data_path, config, tokenizer, seq_length=1024,image_root='ureader_images'): 5 | """Build train and valid datasets.""" 6 | print('> building train and validation datasets for mPLUG-Owl ...') 7 | train_ds, valid_ds = build_train_valid_test_datasets( 8 | input_file=data_path, 9 | tokenizer=tokenizer, 10 | max_length=seq_length, 11 | config=config, 12 | image_root=image_root) 13 | print("> finished creating mPLUG-Owl datasets ...") 14 | 15 | return train_ds, valid_ds 16 | 17 | def build_train_valid_test_datasets(input_file, tokenizer, max_length=80, config=None,image_root='ureader_images'): 18 | train_processors = build_processors(config['train_processors']) 19 | valid_processors = build_processors(config['valid_processors']) 20 | if isinstance(input_file, dict): 21 | train_ds = MultiModalDataset(input_file['train'][0], tokenizer, train_processors, max_length, image_root=image_root) 22 | valid_ds = {name: MultiModalDataset(ds, tokenizer, valid_processors, max_length) for name,ds in input_file['valid'].items()} 23 | test_ds = None 24 | 25 | else: 26 | assert len(input_file) == 2 # If you have files more than 2, modify code at here or merger them into train and dev 27 | train_ds = MultiModalDataset(input_file[0], tokenizer, train_processors, max_length) 28 | valid_ds = MultiModalDataset(input_file[1], tokenizer, valid_processors, max_length) 29 | test_ds = None 30 | return (train_ds, valid_ds) 31 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/data_utils/processors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Alibaba. All rights reserved. 2 | from .builder import PROCESSORS, build_processors 3 | from .default_processor import DefaultProcessor 4 | from .caption_processor import CaptionProcessor 5 | from .doc_processor import DocPretrainProcessor, DocSFTProcessor 6 | __all__ = [ 7 | 'PROCESSORS', 'build_processors', 8 | 'DefaultProcessor', 'CaptionProcessor', 9 | 'DocPretrainProcessor', 'DocSFTProcessor' 10 | ] -------------------------------------------------------------------------------- /PaperOwl/pipeline/data_utils/processors/builder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from icecream import ic 4 | from pipeline.data_utils.registry import Registry, build_from_cfg 5 | # from .data_utils.registry import Registry, build_from_cfg 6 | 7 | PROCESSORS = Registry('processors') 8 | 9 | def build_processors(processors_cfg): 10 | processors = dict() 11 | for task, processor in processors_cfg.items(): 12 | processors[task] = build_from_cfg(processor, PROCESSORS) 13 | ic(type(processors[task])) 14 | return processors 15 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/data_utils/processors/caption_processor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import transforms 3 | from PIL import Image 4 | import random 5 | 6 | from pipeline.data_utils.randaugment import RandomAugment 7 | from .builder import PROCESSORS 8 | 9 | 10 | @PROCESSORS.register_module() 11 | class CaptionProcessor: 12 | def __init__(self, image_size=224, min_scale = 0.5, randaug=False): 13 | self.image_size = image_size 14 | self.min_scale = min_scale 15 | 16 | if randaug: 17 | self.image_transform = transforms.Compose([ 18 | transforms.RandomResizedCrop(image_size,scale=(min_scale, 1.0), interpolation=Image.BICUBIC), 19 | transforms.RandomHorizontalFlip(), 20 | RandomAugment(2,7,isPIL=True,augs=['Identity','AutoContrast','Equalize','Brightness','Sharpness', 21 | 'ShearX', 'ShearY', 'TranslateX', 'TranslateY', 'Rotate']), 22 | transforms.ToTensor(), 23 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), 24 | ]) 25 | else: 26 | self.image_transform = transforms.Compose([ 27 | transforms.RandomResizedCrop(image_size,scale=(min_scale, 1.0), interpolation=Image.BICUBIC), 28 | transforms.RandomHorizontalFlip(), 29 | transforms.ToTensor(), 30 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), 31 | ]) 32 | self.text_transform = None 33 | 34 | def __call__(self, image, text): 35 | assert image or text 36 | 37 | if image: 38 | image_input = self.image_transform(image) 39 | else: 40 | image_input = None 41 | 42 | if text: 43 | if isinstance(text["prompt"], list): 44 | prompt = random.choice(text["prompt"]) 45 | else: 46 | prompt = text["prompt"] 47 | text_input = dict( 48 | prompt=prompt, 49 | completion=text["text"], 50 | ) 51 | else: 52 | text_input = None 53 | return image_input, text_input -------------------------------------------------------------------------------- /PaperOwl/pipeline/data_utils/processors/default_processor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import transforms 3 | from PIL import Image 4 | import random 5 | 6 | from pipeline.data_utils.randaugment import RandomAugment 7 | from .builder import PROCESSORS 8 | 9 | 10 | @PROCESSORS.register_module() 11 | class DefaultProcessor: 12 | def __init__(self, image_size=224): 13 | self.image_size = image_size 14 | 15 | self.image_transform = transforms.Compose([ 16 | transforms.Resize((image_size, image_size),interpolation=Image.BICUBIC), 17 | transforms.ToTensor(), 18 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), 19 | ]) 20 | 21 | self.text_transform = None 22 | 23 | def __call__(self, image, text): 24 | assert image or text 25 | 26 | if image: 27 | image_input = self.image_transform(image) 28 | else: 29 | image_input = None 30 | 31 | if text: 32 | if isinstance(text["prompt"], list): 33 | prompt = random.choice(text["prompt"]) 34 | else: 35 | prompt = text["prompt"] 36 | text_input = dict( 37 | prompt=prompt, 38 | completion=text["text"], 39 | ) 40 | else: 41 | text_input = None 42 | return image_input, text_input -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/pipeline/eval_utils/__init__.py -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | from .__main__ import cli_main 2 | from .due_evaluator import DueEvaluator 3 | 4 | __all__ = ['DueEvaluator', 'cli_main'] 5 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import sys 6 | from typing import Optional, Set 7 | import json 8 | 9 | from .due_evaluator import DueEvaluator 10 | from .utils import property_scores_to_string 11 | 12 | 13 | def parse_args(): 14 | """Parse CLI arguments. 15 | 16 | Returns: 17 | namespace: namespace with parsed variables. 18 | 19 | """ 20 | parser = argparse.ArgumentParser('Document Understanding Evaluator') 21 | parser.add_argument( 22 | '--out-files', 23 | '-o', 24 | type=argparse.FileType('r', encoding='utf-8'), 25 | required=True, 26 | nargs='+', 27 | help='Out file to evaluate', 28 | ) 29 | parser.add_argument( 30 | '--reference', '-r', type=argparse.FileType('r', encoding='utf-8'), required=True, help='Reference file', 31 | ) 32 | parser.add_argument('--metric', '-m', type=str, default='F1', choices=['F1', 'MEAN-F1', 'ANLS', 'WTQ', 'GROUP-ANLS']) 33 | parser.add_argument( 34 | '--return-score', 35 | default='F1', 36 | choices=['F1', 'mean-F1', 'ANLS', 'mean-Precision', 'mean-Recall', 'WTQ'], 37 | help='Return WR-like mean-F1 score', 38 | ) 39 | parser.add_argument('--line-by-line', action='store_true', default=False, help='Return retults example-based') 40 | parser.add_argument( 41 | '--columns', type=str, nargs='+', default=['Precision', 'Recall', 'F1'], help='Columns', 42 | ) 43 | parser.add_argument( 44 | '--print-format', 45 | default='text', 46 | type=str, 47 | choices=['text', 'latex', 'json'], 48 | help='Print feature table in the given format', 49 | ) 50 | parser.add_argument('--properties', nargs='+', type=str, help='Property set to be limitted to') 51 | parser.add_argument( 52 | '--ignore-case', '-i', action='store_true', default=False, help='Property set to be limitted to', 53 | ) 54 | return parser.parse_args() 55 | 56 | 57 | def cli_main(args: argparse.Namespace): 58 | """CLI main. 59 | 60 | Args: 61 | args: cli arguments 62 | """ 63 | reference = [json.loads(line) for line in args.reference] 64 | 65 | evaluators = [] 66 | for out_file in args.out_files: 67 | predictions = [json.loads(line) for line in out_file] 68 | 69 | property_set: Optional[Set[str]] 70 | if args.properties: 71 | property_set = args.properties 72 | else: 73 | property_set = None 74 | 75 | evaluators.append( 76 | DueEvaluator(reference, predictions, property_set, args.ignore_case, out_file.name, args.metric) 77 | ) 78 | 79 | prop_str = property_scores_to_string(evaluators, args.print_format, args.columns) 80 | if args.print_format != 'json': 81 | print(prop_str, file=sys.stderr) 82 | 83 | if args.line_by_line: 84 | for idx, score in enumerate(evaluators[0].line_by_line()): 85 | print(f'{idx}: {score}', file=sys.stderr) 86 | return prop_str 87 | 88 | 89 | def main() -> None: 90 | """Main.""" 91 | args = parse_args() 92 | cli_main(args) 93 | 94 | 95 | if __name__ == '__main__': 96 | main() 97 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/__version__.py: -------------------------------------------------------------------------------- 1 | """Version specification.""" 2 | 3 | VERSION = (0, 0, 8) 4 | __version__ = '.'.join(map(str, VERSION)) 5 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/pipeline/eval_utils/due_evaluator/py.typed -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/scorers/__init__.py: -------------------------------------------------------------------------------- 1 | from .anls_scorer import AnlsScorer 2 | from .base_scorer import BaseScorer 3 | from .fscorer import FScorer 4 | from .mean_fscorer import MeanFScorer 5 | from .wtq_scorer import WtqScorer 6 | from .group_anls import GroupAnlsScorer 7 | from .geval_scorer import GevalScorer 8 | 9 | __all__ = ['AnlsScorer', 'BaseScorer', 'FScorer', 'MeanFScorer', 'WtqScorer', 'GevalScorer', 'GroupAnlsScorer'] 10 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/scorers/accuracy_scorer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | from operator import itemgetter 4 | 5 | from .base_scorer import BaseScorer 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class AccuracyScorer(BaseScorer): 11 | """Accuracy Scorer.""" 12 | 13 | def __init__(self, threshold: float = 0.5): 14 | self.__scores: List[float] = [] 15 | self.threshold = threshold 16 | 17 | @property 18 | def scores(self): 19 | return self.__scores 20 | 21 | def check_denotation(self, out: list, ref: list) -> bool: 22 | return out == ref 23 | 24 | def add(self, out_items: List[dict], ref_items: List[dict]): 25 | """Add more items for computing corpus level scores. 26 | 27 | Args: 28 | out_items: outs from a single document (line) 29 | ref_items: reference of the evaluated document (line) 30 | 31 | """ 32 | out_ann = sorted(out_items['annotations'], key=itemgetter('key')) 33 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key')) 34 | assert [a['key'] for a in out_ann] == [a['key'] for a in ref_ann] 35 | 36 | for out, ref in zip(out_ann, ref_ann): 37 | o_values = [v['value'] for v in out['values']] 38 | r_values = [v['value'] for v in ref['values']] 39 | score = int(self.check_denotation(o_values, r_values)) 40 | self.__scores.append(score) 41 | 42 | def score(self) -> float: 43 | if self.__scores: 44 | return sum(self.__scores) / len(self.__scores) 45 | return 0.0 46 | 47 | @classmethod 48 | def support_feature_scores(cls) -> bool: 49 | return False 50 | 51 | @classmethod 52 | def metric_name(cls) -> str: 53 | return "Accuracy" 54 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/scorers/anls_scorer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | from operator import itemgetter 4 | 5 | import textdistance 6 | 7 | from .base_scorer import BaseScorer 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class AnlsScorer(BaseScorer): 13 | """ANSL Scorer.""" 14 | 15 | def __init__(self, threshold: float = 0.5): 16 | self.__scores: List[float] = [] 17 | self.threshold = threshold 18 | 19 | @property 20 | def scores(self): 21 | return self.__scores 22 | 23 | def add(self, out_items: List[dict], ref_items: List[dict]): 24 | """Add more items for computing corpus level scores. 25 | 26 | Args: 27 | out_items: outs from a single document (line) 28 | ref_items: reference of the evaluated document (line) 29 | 30 | """ 31 | out_ann = sorted(out_items['annotations'], key=itemgetter('key')) 32 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key')) 33 | assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann] 34 | 35 | """try: 36 | # assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann] 37 | out_keys = [a['key'][:100] for a in out_ann] 38 | ref_keys = [a['key'][:100] for a in ref_ann] 39 | # assert out_keys == ref_keys 40 | for i in range(len(out_keys)): 41 | try: 42 | assert out_keys[i] == ref_keys[i] 43 | except AssertionError as e: 44 | print(out_keys[i]) 45 | print(ref_keys[i]) 46 | print('==============') 47 | # exit(0) 48 | 49 | except AssertionError as e: 50 | print('key of pred and gt unmatched:') 51 | # print('pred:', out_keys) 52 | # print('gt:', ref_keys) 53 | exit(0)""" 54 | 55 | for out, ref in zip(out_ann, ref_ann): 56 | assert len(out['values']) == 1 57 | val = out['values'][0]['value'] 58 | possible_vals = ref['values'][0]['value_variants'] 59 | best_score = max([textdistance.levenshtein.normalized_similarity(val, pos) 60 | for pos in possible_vals]) 61 | if 1 - self.threshold >= best_score: 62 | best_score = 0.0 63 | self.__scores.append(best_score) 64 | 65 | def score(self) -> float: 66 | if self.__scores: 67 | return sum(self.__scores) / len(self.__scores) 68 | return 0.0 69 | 70 | @classmethod 71 | def support_feature_scores(cls) -> bool: 72 | return False 73 | 74 | @classmethod 75 | def metric_name(cls) -> str: 76 | return "ANLS" 77 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/scorers/base_scorer.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List 3 | 4 | 5 | class BaseScorer(abc.ABC): 6 | """Abstract class for scorers.""" 7 | 8 | @abc.abstractmethod 9 | def add(self, out_items: List[dict], ref_items: List[dict]): 10 | pass 11 | 12 | @abc.abstractmethod 13 | def score(self): 14 | pass 15 | 16 | @abc.abstractclassmethod 17 | def support_feature_scores(cls) -> bool: 18 | pass 19 | 20 | @abc.abstractclassmethod 21 | def metric_name(cls) -> str: 22 | pass 23 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/scorers/geval_scorer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import tempfile 3 | from collections import defaultdict 4 | import os 5 | 6 | from .fscorer import FScorer 7 | from .base_scorer import BaseScorer 8 | 9 | 10 | GEVAL_BINARY = os.getenv('GEVAL_BINARY', '/data/shared/bin/geval') 11 | GEVAL_METRIC = os.getenv('GEVAL_METRIC', 'MultiLabel-F1:cN') 12 | 13 | 14 | class GevalScorer(BaseScorer): 15 | def __init__(self): 16 | self.__ref = tempfile.NamedTemporaryFile('w+t') 17 | self.__out = tempfile.NamedTemporaryFile('w+t') 18 | self.__ref_data = defaultdict(set) 19 | self.__out_data = defaultdict(set) 20 | 21 | @staticmethod 22 | def add_to_geval_data(data, line): 23 | name = line['name'] 24 | for annotation in line['annotations']: 25 | for idx, val in enumerate(annotation['values'], 1): 26 | for child in val['children']: 27 | new_name = child['key'] + '__' + str(idx) if '__' in child['key'] else child['key'] 28 | if child['values'] and child['values'] != ['']: 29 | new_value = '|'.join([v['value'].replace(' ', '_') for v in child['values']]) 30 | data[name].add(f'{new_name}={new_value}') 31 | 32 | def save_geval_files(self): 33 | for name in sorted(self.__ref_data.keys()): 34 | self.__ref.write(' '.join(self.__ref_data[name]) + '\n') 35 | self.__out.write(' '.join(self.__out_data[name]) + '\n') 36 | 37 | def add(self, out_items: List[str], ref_items: List[str]): 38 | self.add_to_geval_data(self.__out_data, out_items) 39 | self.add_to_geval_data(self.__ref_data, ref_items) 40 | 41 | def support_feature_scores(cls) -> bool: 42 | return False 43 | 44 | def metric_name(cls) -> str: 45 | return "GEVAL" 46 | 47 | def run_geval(self): 48 | self.__ref.flush() 49 | self.__out.flush() 50 | try: 51 | return float(os.popen(f'{GEVAL_BINARY} -o {self.__out.name} -e {self.__ref.name} --metric {GEVAL_METRIC}').read()) 52 | except: 53 | return -1 54 | 55 | def score(self) -> float: 56 | self.save_geval_files() 57 | return self.run_geval() 58 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/scorers/mean_fscorer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from .fscorer import FScorer 4 | from .base_scorer import BaseScorer 5 | 6 | 7 | class MeanFScorer(BaseScorer): 8 | def __init__(self): 9 | self.__scores: List[float] = [] 10 | 11 | def add(self, out_items: List[str], ref_items: List[str]): 12 | fscorer = FScorer() 13 | fscorer.add(out_items, ref_items) 14 | self.__scores.append(fscorer.f_score()) 15 | 16 | def support_feature_scores(cls) -> bool: 17 | return False 18 | 19 | def metric_name(cls) -> str: 20 | return "MEAN-F1" 21 | 22 | def score(self) -> float: 23 | if self.__scores: 24 | return sum(self.__scores) / len(self.__scores) 25 | return 0.0 26 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/due_evaluator/utils.py: -------------------------------------------------------------------------------- 1 | from .scorers.fscorer import FScorer 2 | from typing import Dict, List, Optional, Sequence, Union 3 | 4 | import pandas as pd 5 | 6 | from .due_evaluator import DueEvaluator 7 | 8 | 9 | def dataframe_to_print(df: pd.DataFrame, print_format: Optional[str] = 'text') -> str: 10 | """Export dataframe to json or plain text. 11 | 12 | Args: 13 | df (pd.DataFrame): data 14 | print_format (str, optional): Print format. Defaults to 'text'. 15 | 16 | Raises: 17 | ValueError: unknown print_format 18 | 19 | Returns: 20 | str: printed version of dataframe 21 | 22 | """ 23 | out: str 24 | if print_format == 'latex': 25 | out = df.reset_index().to_latex(index=False) 26 | elif print_format == 'text': 27 | out = df.reset_index().to_string(index=False) 28 | elif print_format == 'json': 29 | out = df.to_json(orient='index') 30 | else: 31 | raise ValueError() 32 | return out 33 | 34 | 35 | def property_scores_to_string( 36 | dues: List[DueEvaluator], print_format: str = 'text', columns: Sequence[str] = ('Precision', 'Recall', 'F-1'), 37 | ) -> str: 38 | """Print out scores per property. 39 | 40 | Args: 41 | dues: List of DueEvaluators 42 | print_format: output format: text or latex 43 | columns: a list of metrics to print 44 | 45 | Returns: 46 | str: string table with feature scores. 47 | 48 | """ 49 | data = [] 50 | for property_name in sorted(dues[0].property_scorers.keys()) + ['ALL']: 51 | row_data: Dict[str, Union[str, float]] = {} 52 | row_data['Label'] = property_name 53 | for due in dues: 54 | if len(dues) == 1: 55 | suffix = '' 56 | else: 57 | suffix = f' ({due.path})' 58 | if property_name == 'ALL': 59 | scorer = due.general_scorer 60 | else: 61 | scorer = due.property_scorers[property_name] 62 | 63 | row_data[scorer.metric_name() + suffix] = scorer.score() 64 | if isinstance(scorer, FScorer): 65 | if 'Precision' in columns: 66 | row_data['Precision' + suffix] = scorer.precision() 67 | if 'Recall' in columns: 68 | row_data['Recall' + suffix] = scorer.recall() 69 | data.append(row_data) 70 | 71 | df = pd.DataFrame(data) 72 | df.set_index('Label', drop=True, inplace=True) 73 | 74 | return dataframe_to_print(df, print_format) 75 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/eval_utils/run_evaluation.py: -------------------------------------------------------------------------------- 1 | from .tools import llm_answer_eval, postprocess_llm_vqa, textcaps_textvqa_eval 2 | 3 | if __name__ == '__main__': 4 | 5 | llm_answer_eval(metric_names=['RelaxedAccuracy'], result_path='evaluate_results/test_ChartQA.jsonl', save_each_eval=True) 6 | llm_answer_eval(metric_names=['ExactAccuracy'], result_path='evaluate_results/test_TabFact.jsonl', save_each_eval=True) 7 | llm_answer_eval(metric_names=['BLEU1', 'BLEU2', 'BLEU3', 'BLEU4', 'Meteor', 'RougeL', 'CIDEr'], result_path='evaluate_results/test_VisualMRC.jsonl', save_each_eval=True) 8 | 9 | 10 | postprocess_llm_vqa(dataset_name='DeepFormQA', split='test', 11 | llm_pred_path='./evaluate_results/test_DeepForm.jsonl', 12 | eval_flag=True) 13 | postprocess_llm_vqa(dataset_name='DocVQA', split='test', 14 | llm_pred_path='./evaluate_results/test_DocVQA.jsonl', 15 | eval_flag=True) 16 | postprocess_llm_vqa(dataset_name='InfographicsVQA', split='test', 17 | llm_pred_path='evaluate_results/test_InfographicsVQA.jsonl', 18 | eval_flag=True) 19 | postprocess_llm_vqa(dataset_name='KleisterCharityQA', split='test', 20 | llm_pred_path='evaluate_results/test_KleisterCharity.jsonl', 21 | eval_flag=True) 22 | postprocess_llm_vqa(dataset_name='WikiTableQuestions', split='test', 23 | llm_pred_path='evaluate_results/test_WikiTableQuestions.jsonl', 24 | eval_flag=True) 25 | 26 | # need to submit evaluate_results/***_official_eval.json 27 | textcaps_textvqa_eval(result_path='evaluate_results/test_TextCaps.jsonl', dataset='TextCaps', split='test') 28 | textcaps_textvqa_eval(result_path='evaluate_results/test_TextVQA.jsonl', dataset='TextVQA', split='test') 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/interface.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import requests 4 | from PIL import Image 5 | from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration 6 | from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer 7 | from mplug_owl.processing_mplug_owl import MplugOwlImageProcessor, MplugOwlProcessor 8 | from sconf import Config 9 | from pipeline.data_utils.processors.builder import build_processors 10 | 11 | 12 | def get_model(pretrained_ckpt, use_bf16=False): 13 | """Model Provider with tokenizer and processor. 14 | 15 | Args: 16 | pretrained_ckpt (string): The path to pre-trained checkpoint. 17 | use_bf16 (bool, optional): Whether to use bfloat16 to load the model. Defaults to False. 18 | 19 | Returns: 20 | model: MplugOwl Model 21 | tokenizer: MplugOwl text tokenizer 22 | processor: MplugOwl processor (including text and image) 23 | """ 24 | model = MplugOwlForConditionalGeneration.from_pretrained( 25 | pretrained_ckpt, 26 | torch_dtype=torch.bfloat16 if use_bf16 else torch.half, 27 | ) 28 | config = Config('configs/sft/release.yaml') 29 | image_processor = build_processors(config['valid_processors'])['sft'] 30 | tokenizer = MplugOwlTokenizer.from_pretrained(pretrained_ckpt) 31 | processor = MplugOwlProcessor(image_processor, tokenizer) 32 | return model, tokenizer, processor 33 | 34 | 35 | def do_generate(prompts, image_list, model, tokenizer, processor, use_bf16=False, **generate_kwargs): 36 | """The interface for generation 37 | 38 | Args: 39 | prompts (List[str]): The prompt text 40 | image_list (List[str]): Paths of images 41 | model (MplugOwlForConditionalGeneration): MplugOwlForConditionalGeneration 42 | tokenizer (MplugOwlTokenizer): MplugOwlTokenizer 43 | processor (MplugOwlProcessor): MplugOwlProcessor 44 | use_bf16 (bool, optional): Whether to use bfloat16. Defaults to False. 45 | 46 | Returns: 47 | sentence (str): Generated sentence. 48 | """ 49 | if image_list: 50 | images = [Image.open(_) for _ in image_list] 51 | else: 52 | images = None 53 | inputs = processor(text=prompts, images=images, return_tensors='pt') 54 | inputs = {k: v.bfloat16() if v.dtype == torch.float else v for k, v in inputs.items()} 55 | inputs = {k: v.to(model.device) for k, v in inputs.items()} 56 | with torch.no_grad(): 57 | res = model.generate(**inputs, **generate_kwargs) 58 | sentence = tokenizer.decode(res.tolist()[0], skip_special_tokens=True) 59 | return sentence 60 | 61 | 62 | if __name__ == '__main__': 63 | pass 64 | -------------------------------------------------------------------------------- /PaperOwl/pipeline/trainer.py: -------------------------------------------------------------------------------- 1 | import torch.distributed as dist 2 | import argparse 3 | from functools import partial 4 | 5 | import torch 6 | 7 | from torch.utils.data import DataLoader, Dataset 8 | from torch.utils.data.distributed import DistributedSampler 9 | 10 | from transformers import Trainer 11 | 12 | from pipeline.utils import batchify 13 | 14 | 15 | class CustomTrainer(Trainer): 16 | 17 | def get_train_dataloader(self) -> DataLoader: 18 | dataset = self.train_dataset 19 | sampler = DistributedSampler(dataset) 20 | return torch.utils.data.DataLoader( 21 | dataset, batch_size=self._train_batch_size, 22 | sampler=sampler, 23 | num_workers=self.args.dataloader_num_workers, 24 | drop_last=True, 25 | pin_memory=False, 26 | collate_fn=batchify) 27 | 28 | 29 | def get_eval_dataloader(self, eval_dataset: Dataset | None = None) -> DataLoader: 30 | dataset = self.eval_dataset 31 | sampler = DistributedSampler(dataset, shuffle=False) 32 | return torch.utils.data.DataLoader( 33 | dataset, batch_size=self._train_batch_size, 34 | sampler=sampler, 35 | num_workers=self.args.dataloader_num_workers, 36 | drop_last=True, 37 | pin_memory=False, 38 | collate_fn=batchify) -------------------------------------------------------------------------------- /PaperOwl/scripts/train_it.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # For A100 80G 3 | DIR=`pwd` 4 | export PYTHONPATH=$DIR 5 | DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'` 6 | 7 | if [ $MASTER_ADDR ];then 8 | echo $MASTER_ADDR 9 | echo $MASTER_PORT 10 | echo $WORLD_SIZE 11 | echo $RANK 12 | else 13 | MASTER_ADDR=127.0.0.1 14 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15 15 | WORLD_SIZE=1 16 | RANK=0 17 | fi 18 | 19 | DISTRIBUTED_ARGS="--nproc_per_node 1 \ 20 | --nnodes ${WORLD_SIZE} \ 21 | --node_rank ${RANK} \ 22 | --master_addr ${MASTER_ADDR} \ 23 | --master_port ${MASTER_PORT}" 24 | 25 | EXP_NAME=paperowl 26 | 27 | max_length=2304 28 | micro_batch_size=4 29 | global_batch_size=256 30 | gradient_accumulation_steps=1 31 | 32 | SAVE_NAME=${ureader}_${max_length}_${global_batch_size} 33 | 34 | SAVE_PATH="./output/${EXP_NAME}/" 35 | TENSORBOARD_PATH="./tensorboard/sft/${SAVE_NAME}/" 36 | 37 | 38 | train_epochs=10 39 | train_iters=29000 40 | 41 | lr_warmup_iters=36 42 | 43 | eval_iter=290 44 | eval_interval=1160 45 | save_interval=1160 46 | 47 | mkdir -p ${SAVE_PATH} 48 | mkdir -p ${TENSORBOARD_PATH} 49 | 50 | options=" \ 51 | --pretrained-ckpt checkpoints/mplug-owl-llama-7b \ 52 | --seq-length ${max_length} \ 53 | --micro-batch-size ${micro_batch_size} \ 54 | --global-batch-size ${global_batch_size} \ 55 | --num-training-steps ${train_iters} \ 56 | --train-epochs ${train_epochs} \ 57 | --num-warmup-steps ${lr_warmup_iters} \ 58 | --gradient-accumulation-steps ${gradient_accumulation_steps} \ 59 | --lr 1e-4 \ 60 | --min-lr 1e-6 \ 61 | --eval-iters ${eval_iter} \ 62 | --save-interval ${save_interval} \ 63 | --save-path ${SAVE_PATH} \ 64 | --tensorboard-dir ${TENSORBOARD_PATH} \ 65 | --clip-grad 1.0 \ 66 | --weight-decay 0.0001 \ 67 | --adam-beta1 0.9 \ 68 | --adam-beta2 0.999 \ 69 | --num-workers 16 \ 70 | --use-lora \ 71 | --gradient-checkpointing \ 72 | --bf16" 73 | 74 | multimodal_options=" \ 75 | --mm-config configs/sft/release.yaml 76 | " 77 | 78 | python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log -------------------------------------------------------------------------------- /PaperOwl/scripts/train_it_v100.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # For V100 32G 3 | DIR=`pwd` 4 | export PYTHONPATH=$DIR 5 | DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'` 6 | 7 | if [ $MASTER_ADDR ];then 8 | echo $MASTER_ADDR 9 | echo $MASTER_PORT 10 | echo $WORLD_SIZE 11 | echo $RANK 12 | else 13 | MASTER_ADDR=127.0.0.1 14 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15 15 | WORLD_SIZE=1 16 | RANK=0 17 | fi 18 | 19 | DISTRIBUTED_ARGS="--nproc_per_node 1 \ 20 | --nnodes ${WORLD_SIZE} \ 21 | --node_rank ${RANK} \ 22 | --master_addr ${MASTER_ADDR} \ 23 | --master_port ${MASTER_PORT}" 24 | 25 | EXP_NAME=paperowl 26 | 27 | max_length=2304 28 | micro_batch_size=1 29 | global_batch_size=256 30 | gradient_accumulation_steps=1 31 | 32 | SAVE_NAME=${ureader}_${max_length}_${global_batch_size} 33 | 34 | SAVE_PATH="./output/${EXP_NAME}/" 35 | TENSORBOARD_PATH="./tensorboard/sft/${SAVE_NAME}/" 36 | 37 | 38 | train_epochs=10 39 | train_iters=29000 40 | 41 | lr_warmup_iters=36 42 | 43 | eval_iter=290 44 | eval_interval=1160 45 | save_interval=1160 46 | 47 | mkdir -p ${SAVE_PATH} 48 | mkdir -p ${TENSORBOARD_PATH} 49 | 50 | options=" \ 51 | --pretrained-ckpt checkpoints/mplug-owl-llama-7b \ 52 | --seq-length ${max_length} \ 53 | --micro-batch-size ${micro_batch_size} \ 54 | --global-batch-size ${global_batch_size} \ 55 | --num-training-steps ${train_iters} \ 56 | --train-epochs ${train_epochs} \ 57 | --num-warmup-steps ${lr_warmup_iters} \ 58 | --gradient-accumulation-steps ${gradient_accumulation_steps} \ 59 | --lr 1e-4 \ 60 | --min-lr 1e-6 \ 61 | --eval-iters ${eval_iter} \ 62 | --save-interval ${save_interval} \ 63 | --save-path ${SAVE_PATH} \ 64 | --tensorboard-dir ${TENSORBOARD_PATH} \ 65 | --clip-grad 1.0 \ 66 | --weight-decay 0.0001 \ 67 | --adam-beta1 0.9 \ 68 | --adam-beta2 0.999 \ 69 | --num-workers 16 \ 70 | --use-lora \ 71 | --gradient-checkpointing \ 72 | --fp16 \ 73 | --deepspeed ds_config.json" 74 | 75 | multimodal_options=" \ 76 | --mm-config configs/sft/release.yaml 77 | " 78 | 79 | python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log -------------------------------------------------------------------------------- /PaperOwl/serve/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/PaperOwl/serve/__init__.py -------------------------------------------------------------------------------- /PaperOwl/serve/gradio_css.py: -------------------------------------------------------------------------------- 1 | code_highlight_css = ( 2 | """ 3 | #chatbot .hll { background-color: #ffffcc } 4 | #chatbot .c { color: #408080; font-style: italic } 5 | #chatbot .err { border: 1px solid #FF0000 } 6 | #chatbot .k { color: #008000; font-weight: bold } 7 | #chatbot .o { color: #666666 } 8 | #chatbot .ch { color: #408080; font-style: italic } 9 | #chatbot .cm { color: #408080; font-style: italic } 10 | #chatbot .cp { color: #BC7A00 } 11 | #chatbot .cpf { color: #408080; font-style: italic } 12 | #chatbot .c1 { color: #408080; font-style: italic } 13 | #chatbot .cs { color: #408080; font-style: italic } 14 | #chatbot .gd { color: #A00000 } 15 | #chatbot .ge { font-style: italic } 16 | #chatbot .gr { color: #FF0000 } 17 | #chatbot .gh { color: #000080; font-weight: bold } 18 | #chatbot .gi { color: #00A000 } 19 | #chatbot .go { color: #888888 } 20 | #chatbot .gp { color: #000080; font-weight: bold } 21 | #chatbot .gs { font-weight: bold } 22 | #chatbot .gu { color: #800080; font-weight: bold } 23 | #chatbot .gt { color: #0044DD } 24 | #chatbot .kc { color: #008000; font-weight: bold } 25 | #chatbot .kd { color: #008000; font-weight: bold } 26 | #chatbot .kn { color: #008000; font-weight: bold } 27 | #chatbot .kp { color: #008000 } 28 | #chatbot .kr { color: #008000; font-weight: bold } 29 | #chatbot .kt { color: #B00040 } 30 | #chatbot .m { color: #666666 } 31 | #chatbot .s { color: #BA2121 } 32 | #chatbot .na { color: #7D9029 } 33 | #chatbot .nb { color: #008000 } 34 | #chatbot .nc { color: #0000FF; font-weight: bold } 35 | #chatbot .no { color: #880000 } 36 | #chatbot .nd { color: #AA22FF } 37 | #chatbot .ni { color: #999999; font-weight: bold } 38 | #chatbot .ne { color: #D2413A; font-weight: bold } 39 | #chatbot .nf { color: #0000FF } 40 | #chatbot .nl { color: #A0A000 } 41 | #chatbot .nn { color: #0000FF; font-weight: bold } 42 | #chatbot .nt { color: #008000; font-weight: bold } 43 | #chatbot .nv { color: #19177C } 44 | #chatbot .ow { color: #AA22FF; font-weight: bold } 45 | #chatbot .w { color: #bbbbbb } 46 | #chatbot .mb { color: #666666 } 47 | #chatbot .mf { color: #666666 } 48 | #chatbot .mh { color: #666666 } 49 | #chatbot .mi { color: #666666 } 50 | #chatbot .mo { color: #666666 } 51 | #chatbot .sa { color: #BA2121 } 52 | #chatbot .sb { color: #BA2121 } 53 | #chatbot .sc { color: #BA2121 } 54 | #chatbot .dl { color: #BA2121 } 55 | #chatbot .sd { color: #BA2121; font-style: italic } 56 | #chatbot .s2 { color: #BA2121 } 57 | #chatbot .se { color: #BB6622; font-weight: bold } 58 | #chatbot .sh { color: #BA2121 } 59 | #chatbot .si { color: #BB6688; font-weight: bold } 60 | #chatbot .sx { color: #008000 } 61 | #chatbot .sr { color: #BB6688 } 62 | #chatbot .s1 { color: #BA2121 } 63 | #chatbot .ss { color: #19177C } 64 | #chatbot .bp { color: #008000 } 65 | #chatbot .fm { color: #0000FF } 66 | #chatbot .vc { color: #19177C } 67 | #chatbot .vg { color: #19177C } 68 | #chatbot .vi { color: #19177C } 69 | #chatbot .vm { color: #19177C } 70 | #chatbot .il { color: #666666 } 71 | """) 72 | #.highlight { background: #f8f8f8; } 73 | 74 | -------------------------------------------------------------------------------- /PaperOwl/serve/model_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import re 4 | import torch 5 | import transformers 6 | import traceback 7 | 8 | from queue import Queue 9 | from threading import Thread 10 | 11 | 12 | def post_process_output(text): 13 | text = text.strip() 14 | pattern = re.compile( 15 | r"||||\[PAD\]|<\|endoftext\|>|\[UNK\]|\[CLS\]|\[MASK\]|<\|startofpiece\|>|<\|endofpiece\|>|\[gMASK\]|\[sMASK\]" 16 | ) 17 | text = pattern.sub("", text.strip()).strip() 18 | return text 19 | 20 | 21 | def post_process_code(code): 22 | sep = "\n```" 23 | if sep in code: 24 | blocks = code.split(sep) 25 | if len(blocks) % 2 == 1: 26 | for i in range(1, len(blocks), 2): 27 | blocks[i] = blocks[i].replace("\\_", "_") 28 | code = sep.join(blocks) 29 | return code 30 | 31 | 32 | class Stream(transformers.StoppingCriteria): 33 | def __init__(self, callback_func=None): 34 | self.callback_func = callback_func 35 | 36 | def __call__(self, input_ids, scores) -> bool: 37 | if self.callback_func is not None: 38 | self.callback_func(input_ids[0]) 39 | return False 40 | 41 | 42 | class Iteratorize: 43 | 44 | """ 45 | Transforms a function that takes a callback 46 | into a lazy iterator (generator). 47 | """ 48 | 49 | def __init__(self, func, kwargs={}, callback=None): 50 | self.mfunc = func 51 | self.c_callback = callback 52 | self.q = Queue() 53 | self.sentinel = object() 54 | self.kwargs = kwargs 55 | self.stop_now = False 56 | 57 | def _callback(val): 58 | if self.stop_now: 59 | raise ValueError 60 | self.q.put(val) 61 | 62 | def gentask(): 63 | try: 64 | ret = self.mfunc(callback=_callback, **self.kwargs) 65 | except ValueError: 66 | pass 67 | except: 68 | traceback.print_exc() 69 | pass 70 | 71 | self.q.put(self.sentinel) 72 | if self.c_callback: 73 | self.c_callback(ret) 74 | 75 | self.thread = Thread(target=gentask) 76 | self.thread.start() 77 | 78 | def __iter__(self): 79 | return self 80 | 81 | def __next__(self): 82 | obj = self.q.get(True, None) 83 | if obj is self.sentinel: 84 | raise StopIteration 85 | else: 86 | return obj 87 | 88 | def __enter__(self): 89 | return self 90 | 91 | def __exit__(self, exc_type, exc_val, exc_tb): 92 | self.stop_now = True -------------------------------------------------------------------------------- /TinyChart/assets/Paper-Arxiv-orange.svg: -------------------------------------------------------------------------------- 1 | Paper: ArxivPaperArxiv -------------------------------------------------------------------------------- /TinyChart/assets/cases.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/assets/cases.png -------------------------------------------------------------------------------- /TinyChart/assets/perform_and_speed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/assets/perform_and_speed.png -------------------------------------------------------------------------------- /TinyChart/images/albums.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/albums.png -------------------------------------------------------------------------------- /TinyChart/images/college.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/college.png -------------------------------------------------------------------------------- /TinyChart/images/diseases.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/diseases.png -------------------------------------------------------------------------------- /TinyChart/images/economy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/economy.png -------------------------------------------------------------------------------- /TinyChart/images/immigrants.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/immigrants.png -------------------------------------------------------------------------------- /TinyChart/images/market.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/market.png -------------------------------------------------------------------------------- /TinyChart/images/sails.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/sails.png -------------------------------------------------------------------------------- /TinyChart/images/sports.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/sports.png -------------------------------------------------------------------------------- /TinyChart/images/workers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/images/workers.png -------------------------------------------------------------------------------- /TinyChart/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "tinyllava" 7 | version = "1.0.0" 8 | description = "A Framework of Small-scale Large Multimodal Models." 9 | readme = "README.md" 10 | requires-python = ">=3.9" 11 | classifiers = [ 12 | "Programming Language :: Python :: 3", 13 | "License :: OSI Approved :: Apache Software License", 14 | ] 15 | dependencies = [ 16 | "torch==2.0.1", "torchvision==0.15.2", "tiktoken", 17 | "transformers==4.37.2", "tokenizers==0.15.1", "sentencepiece==0.1.99", "shortuuid", 18 | "accelerate==0.21.0", "peft==0.4.0", "bitsandbytes==0.41.0", 19 | "pydantic<2,>=1", "markdown2[all]", "numpy", "scikit-learn==1.2.2", 20 | "gradio==3.35.2", "gradio_client==0.2.9", 21 | "requests", "httpx==0.24.0", "uvicorn", "fastapi", 22 | "einops==0.6.1", "einops-exts==0.0.4", "timm==0.6.13", 23 | ] 24 | 25 | [project.optional-dependencies] 26 | train = ["deepspeed==0.9.5", "ninja", "wandb"] 27 | 28 | [project.urls] 29 | "Homepage" = "https://github.com/X-PLUG/mPLUG-DocOwl/blob/main/TinyChart" 30 | "Bug Tracker" = "https://github.com/X-PLUG/mPLUG-DocOwl/issues" 31 | 32 | [tool.setuptools.packages.find] 33 | exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"] 34 | 35 | [tool.wheel] 36 | exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"] 37 | 38 | -------------------------------------------------------------------------------- /TinyChart/scripts/convert_model_config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | 5 | def convert_config(path): 6 | config_path = path+'/config.json' 7 | config = json.load(open(config_path, 'r')) 8 | assert os.path.isdir(path+'/vision_tower') 9 | try: 10 | os.symlink(path+'/vision_tower', path+'/siglip') 11 | except: 12 | pass 13 | config['mm_vision_tower'] = path+'/siglip' 14 | json.dump(config, open(config_path, 'w'), indent=4, ensure_ascii=False) 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--input', type=str, required=True) 19 | 20 | args = parser.parse_args() 21 | 22 | if args.input[0] != '/': 23 | args.input = os.getcwd() + '/' + args.input 24 | 25 | if os.path.isdir(args.input+'/vision_tower'): 26 | convert_config(args.input) -------------------------------------------------------------------------------- /TinyChart/scripts/evaluate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Variables 3 | MODEL_PATH=$1 4 | TEST_DATA_PATH=$2 5 | 6 | OUTPUT=${MODEL_PATH}/eval 7 | mkdir -p ${OUTPUT} 8 | cp scripts/evaluate.sh ${OUTPUT}/ 9 | 10 | 11 | export PYTHONPATH=./ 12 | export PYTHONHASHSEED=42 13 | export PYTHONUNBUFFERED=1 14 | 15 | num_chunks=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l) 16 | time_stamp=$(date +%Y%m%d-%H%M%S) 17 | TEMP_DIR=${OUTPUT}/temp_${time_stamp} 18 | mkdir -p ${TEMP_DIR} 19 | 20 | for ((chunk_idx=0; chunk_idx&1 | tee -a ${OUTPUT}/log.txt & 29 | done 30 | wait 31 | 32 | # Merge split && divide by dataset && calculate metric 33 | python scripts/merge_jsonl_sort.py \ 34 | --input ${TEMP_DIR} \ 35 | --output ${TEMP_DIR}/all.jsonl 36 | python scripts/split_jsonl_dataset.py \ 37 | --input ${TEMP_DIR}/all.jsonl \ 38 | --output ${OUTPUT} 39 | python tinychart/eval/run_eval.py \ 40 | --input ${OUTPUT} 41 | -------------------------------------------------------------------------------- /TinyChart/scripts/merge_jsonl_sort.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import argparse 4 | 5 | def read_jsonl(jsonl_path): 6 | with open(jsonl_path, 'r') as f: 7 | data = [json.loads(line) for line in f] 8 | return data 9 | 10 | def write_jsonl(data, jsonl_path): 11 | with open(jsonl_path, 'w', encoding='utf-8') as f: 12 | for item in data: 13 | f.write(json.dumps(item) + '\n') 14 | 15 | if __name__ == '__main__': 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('--input', default='temp/') 18 | parser.add_argument('--output', default='chartqa_val.json') 19 | 20 | args = parser.parse_args() 21 | files = os.listdir(args.input) 22 | files.sort() 23 | data = [] 24 | for file in files: 25 | if file != 'all.jsonl': 26 | data.extend(read_jsonl(os.path.join(args.input, file))) 27 | # data.sort(key=lambda x: int(x['id'].split('_')[-1])) 28 | write_jsonl(data, args.output) 29 | -------------------------------------------------------------------------------- /TinyChart/scripts/split_jsonl_dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import argparse 4 | from collections import defaultdict 5 | 6 | def read_jsonl(jsonl_path): 7 | with open(jsonl_path, 'r') as f: 8 | data = [json.loads(line) for line in f] 9 | return data 10 | 11 | def write_jsonl(data, jsonl_path): 12 | with open(jsonl_path, 'w', encoding='utf-8') as f: 13 | for item in data: 14 | f.write(json.dumps(item) + '\n') 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--input', default='all.json') 19 | parser.add_argument('--output', default='./output/') 20 | 21 | args = parser.parse_args() 22 | 23 | all_data = read_jsonl(args.input) 24 | 25 | dataset2jsonl = defaultdict(list) 26 | 27 | for item in all_data: 28 | int_id = item['id'].split('_')[-1] 29 | dataset_name_split = '_'.join(item['id'].split('_')[:-1]) 30 | 31 | if '-two_col-' in dataset_name_split: 32 | dataset_name_split = dataset_name_split.replace('-two_col-', '-') 33 | if '-multi_col-' in dataset_name_split: 34 | dataset_name_split = dataset_name_split.replace('-multi_col-', '-') 35 | 36 | dataset2jsonl[dataset_name_split].append(item) 37 | 38 | for dataset_name_split, data in dataset2jsonl.items(): 39 | data.sort(key=lambda x: int(x['id'].split('_')[-1])) 40 | write_jsonl(data, os.path.join(args.output, f'{dataset_name_split}.jsonl')) -------------------------------------------------------------------------------- /TinyChart/scripts/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | TRAIN_DATA=data/train.json 3 | TEST_DATA=data/test.json 4 | 5 | LLM_PATH=bczhou/TinyLLaVA-3.1B 6 | VIT_PATH=pretrained_models/TinyLLaVA-3.1B-SigLIP 7 | 8 | # # If you want to fine-tune TinyChart-3B-768: 9 | # LLM_PATH=mPLUG/TinyChart-3B-768 10 | # VIT_PATH=mPLUG/TinyChart-3B-768-siglip 11 | 12 | OUTPUT=./checkpoints/TinyChart-3B 13 | 14 | mkdir -p ${OUTPUT} 15 | # Copy the script to OUTPUT directory 16 | cp scripts/train.sh ${OUTPUT}/ 17 | 18 | export PYTHONPATH=./ 19 | 20 | if [ $MASTER_ADDR ];then 21 | echo $MASTER_ADDR 22 | echo $MASTER_PORT 23 | echo $WORLD_SIZE 24 | echo $RANK 25 | else 26 | MASTER_ADDR=127.0.0.1 27 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15 28 | WORLD_SIZE=1 29 | RANK=0 30 | fi 31 | # Change for multinode config 32 | NNODES=${WORLD_SIZE} 33 | NODE_RANK=${RANK} 34 | GPUS_PER_NODE=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) 35 | DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT" 36 | 37 | torchrun $DISTRIBUTED_ARGS \ 38 | tinychart/train/train.py \ 39 | --lora_enable False \ 40 | --tune_vision_tower True \ 41 | --tune_entire_model True \ 42 | --tune_vit_from_layer -1 \ 43 | --deepspeed scripts/zero3_offload_decay.json \ 44 | --model_name_or_path ${LLM_PATH} \ 45 | --vision_tower ${VIT_PATH} \ 46 | --version v1 \ 47 | --data_path ${TRAIN_DATA} \ 48 | --image_folder '' \ 49 | --mm_projector_type mlp2x_gelu \ 50 | --mm_vision_select_layer -2 \ 51 | --mm_use_im_start_end False \ 52 | --mm_use_im_patch_token False \ 53 | --image_aspect_ratio pad \ 54 | --group_by_modality_length True \ 55 | --fp16 True \ 56 | --bf16 False \ 57 | --output_dir ${OUTPUT} \ 58 | --num_train_epochs 3 \ 59 | --per_device_train_batch_size 8 \ 60 | --per_device_eval_batch_size 4 \ 61 | --gradient_accumulation_steps 2 \ 62 | --evaluation_strategy "no" \ 63 | --save_strategy "steps" \ 64 | --save_steps 1000 \ 65 | --save_total_limit 10 \ 66 | --learning_rate 1e-4 \ 67 | --weight_decay 0. \ 68 | --warmup_ratio 0.03 \ 69 | --lr_scheduler_type "cosine" \ 70 | --logging_steps 1 \ 71 | --tf32 False \ 72 | --model_max_length 1024 \ 73 | --gradient_checkpointing True \ 74 | --dataloader_num_workers 4 \ 75 | --lazy_preprocess True \ 76 | --report_to tensorboard \ 77 | 2>&1 | tee -a ${OUTPUT}/log.${RANK}.txt 78 | 79 | # Evaluate 80 | if [ $RANK -eq 0 ]; then 81 | python scripts/convert_model_config.py --input ${OUTPUT} 82 | bash scripts/evaluate.sh ${OUTPUT} ${TEST_DATA} 83 | fi -------------------------------------------------------------------------------- /TinyChart/scripts/vit_add_tome.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | 4 | def read_json(data_path): 5 | with open(data_path, 'r', encoding='utf-8') as f: 6 | data = json.load(f) 7 | return data 8 | 9 | def write_json(data, data_path): 10 | with open(data_path, 'w', encoding='utf-8') as f: 11 | json.dump(data, f, indent=4, ensure_ascii=False) 12 | return 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--path', type=str) 17 | parser.add_argument('--image_size', type=int, default=768) 18 | parser.add_argument('--tome_r', type=int, default=84) 19 | 20 | args = parser.parse_args() 21 | 22 | config = read_json(args.path+'/config.json') 23 | config['use_tome'] = True 24 | config['image_size'] = args.image_size 25 | config['tome_r'] = args.tome_r 26 | write_json(config, args.path+'/config.json') 27 | 28 | -------------------------------------------------------------------------------- /TinyChart/scripts/zero3_offload_decay.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | "zero_optimization": { 14 | "stage": 3, 15 | "offload_optimizer": { 16 | "device": "cpu", 17 | "pin_memory": true 18 | }, 19 | "offload_param": { 20 | "device": "cpu", 21 | "pin_memory": true 22 | }, 23 | "overlap_comm": true, 24 | "contiguous_gradients": true, 25 | "sub_group_size": 1e9, 26 | "reduce_bucket_size": "auto", 27 | "stage3_prefetch_bucket_size": "auto", 28 | "stage3_param_persistence_threshold": "auto", 29 | "stage3_max_live_parameters": 1e9, 30 | "stage3_max_reuse_distance": 1e9, 31 | "gather_16bit_weights_on_model_save": true 32 | }, 33 | "gradient_accumulation_steps": "auto", 34 | "gradient_clipping": "auto", 35 | "train_batch_size": "auto", 36 | "train_micro_batch_size_per_gpu": "auto", 37 | "steps_per_print": 1e5, 38 | "wall_clock_breakdown": false 39 | } -------------------------------------------------------------------------------- /TinyChart/tinychart/__init__.py: -------------------------------------------------------------------------------- 1 | from tinychart.model import * 2 | -------------------------------------------------------------------------------- /TinyChart/tinychart/constants.py: -------------------------------------------------------------------------------- 1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30 2 | WORKER_HEART_BEAT_INTERVAL = 15 3 | 4 | LOGDIR = "." 5 | 6 | # Model Constants 7 | IGNORE_INDEX = -100 8 | IMAGE_TOKEN_INDEX = -200 9 | DEFAULT_IMAGE_TOKEN = "" 10 | DEFAULT_IMAGE_PATCH_TOKEN = "" 11 | DEFAULT_IM_START_TOKEN = "" 12 | DEFAULT_IM_END_TOKEN = "" 13 | IMAGE_PLACEHOLDER = "" 14 | -------------------------------------------------------------------------------- /TinyChart/tinychart/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/tinychart/data/__init__.py -------------------------------------------------------------------------------- /TinyChart/tinychart/data/preprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/tinychart/data/preprocess/__init__.py -------------------------------------------------------------------------------- /TinyChart/tinychart/data/process.py: -------------------------------------------------------------------------------- 1 | import os 2 | import importlib 3 | from typing import Dict, Optional, Sequence, List 4 | 5 | import transformers 6 | 7 | from tinychart.constants import DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN 8 | from tinychart import conversation as conversation_lib 9 | from tinychart.arguments import * 10 | 11 | PREPROCESS_REGISTRY = {} 12 | 13 | def register_preprocess(name): 14 | def register_preprocess_cls(cls): 15 | if name in PREPROCESS_REGISTRY: 16 | return PREPROCESS_REGISTRY[name] 17 | 18 | PREPROCESS_REGISTRY[name] = cls 19 | return cls 20 | 21 | return register_preprocess_cls 22 | 23 | 24 | def import_modules(modules_dir, namespace): 25 | for file in os.listdir(modules_dir): 26 | path = os.path.join(modules_dir, file) 27 | 28 | if ( 29 | not file.startswith("_") 30 | and not file.startswith(".") 31 | and (file.endswith(".py") or os.path.isdir(path)) 32 | ): 33 | module_name = file[: file.find(".py")] if file.endswith(".py") else file 34 | importlib.import_module(namespace + "." + module_name) 35 | 36 | models_dir = os.path.join(os.path.dirname(__file__), 'preprocess') 37 | import_modules(models_dir, "tinychart.data.preprocess") 38 | 39 | 40 | def PreprocessSelect(version): 41 | result = PREPROCESS_REGISTRY.get(version, None) 42 | if result is None: 43 | for name in PREPROCESS_REGISTRY.keys(): 44 | if version in name: 45 | result = PREPROCESS_REGISTRY[name] 46 | break 47 | if result is None: 48 | result = PREPROCESS_REGISTRY['default'] 49 | return result 50 | 51 | 52 | 53 | def preprocess_multimodal( 54 | sources: Sequence[str], 55 | data_args: DataArguments 56 | ) -> Dict: 57 | is_multimodal = data_args.is_multimodal 58 | if not is_multimodal: 59 | return sources 60 | 61 | for source in sources: 62 | for sentence in source: 63 | if DEFAULT_IMAGE_TOKEN in sentence['value']: 64 | sentence['value'] = sentence['value'].replace(DEFAULT_IMAGE_TOKEN, '').strip() 65 | sentence['value'] = DEFAULT_IMAGE_TOKEN + '\n' + sentence['value'] 66 | sentence['value'] = sentence['value'].strip() 67 | if "mmtag" in conversation_lib.default_conversation.version: 68 | sentence['value'] = sentence['value'].replace(DEFAULT_IMAGE_TOKEN, 69 | '' + DEFAULT_IMAGE_TOKEN + '') 70 | replace_token = DEFAULT_IMAGE_TOKEN 71 | if data_args.mm_use_im_start_end: 72 | replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN 73 | sentence["value"] = sentence["value"].replace(DEFAULT_IMAGE_TOKEN, replace_token) 74 | 75 | return sources 76 | 77 | 78 | def preprocess( 79 | sources: Sequence[str], 80 | tokenizer: transformers.PreTrainedTokenizer, 81 | has_image: bool = False 82 | ) -> Dict: 83 | return PreprocessSelect(conversation_lib.default_conversation.version)(sources, tokenizer, has_image) 84 | -------------------------------------------------------------------------------- /TinyChart/tinychart/eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/tinychart/eval/__init__.py -------------------------------------------------------------------------------- /TinyChart/tinychart/eval/eval_chart2text.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import sacrebleu 5 | import numpy as np 6 | 7 | def chart2text_evaluator(data, temp_dir='/output/temp'): 8 | if temp_dir[-1] == '/': 9 | temp_dir = temp_dir[:-1] 10 | cands = [] 11 | refs = [] 12 | for item in data: 13 | cands.append(item['model_answer']) 14 | refs.append(item['gt_answer']) 15 | 16 | bleu = sacrebleu.corpus_bleu(cands, [refs], lowercase=True).score 17 | 18 | return bleu -------------------------------------------------------------------------------- /TinyChart/tinychart/model/__init__.py: -------------------------------------------------------------------------------- 1 | from tinychart.model.language_model.llava_phi import TinyChartPhiForCausalLM, TinyChartPhiConfig -------------------------------------------------------------------------------- /TinyChart/tinychart/model/language_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/tinychart/model/language_model/__init__.py -------------------------------------------------------------------------------- /TinyChart/tinychart/model/model_factory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import importlib 3 | 4 | MODEL_REGISTRY = {} 5 | TOKENIZER_REGISTRY = {} 6 | 7 | 8 | def ModelSelect(model_name_or_path): 9 | model = None 10 | for name in MODEL_REGISTRY.keys(): 11 | if name in model_name_or_path.lower(): 12 | model = MODEL_REGISTRY[name] 13 | if model is None: 14 | model = MODEL_REGISTRY['llama'] 15 | return model 16 | 17 | 18 | def TokenizerSelect(model_name_or_path): 19 | tokenizer_init = None 20 | for name in TOKENIZER_REGISTRY.keys(): 21 | if name in model_name_or_path.lower(): 22 | tokenizer_init = TOKENIZER_REGISTRY[name] 23 | if tokenizer_init is None: 24 | tokenizer_init = TOKENIZER_REGISTRY['llama'] 25 | return tokenizer_init 26 | 27 | 28 | def register_model(name): 29 | def register_model_cls(cls): 30 | if name in MODEL_REGISTRY: 31 | return MODEL_REGISTRY[name] 32 | 33 | MODEL_REGISTRY[name] = cls 34 | return cls 35 | 36 | return register_model_cls 37 | 38 | 39 | def register_tokenizer(name): 40 | def register_tokenizer_cls(cls): 41 | if name in TOKENIZER_REGISTRY: 42 | return TOKENIZER_REGISTRY[name] 43 | 44 | TOKENIZER_REGISTRY[name] = cls 45 | return cls 46 | 47 | return register_tokenizer_cls 48 | 49 | 50 | def import_models(models_dir, namespace): 51 | for file in os.listdir(models_dir): 52 | path = os.path.join(models_dir, file) 53 | if ( 54 | not file.startswith("_") 55 | and not file.startswith(".") 56 | and file.endswith(".py") 57 | ): 58 | model_name = file[: file.find(".py")] if file.endswith(".py") else file 59 | importlib.import_module(namespace + "." + model_name) 60 | 61 | 62 | # automatically import any Python files in the models/ directory 63 | models_dir = os.path.join(os.path.dirname(__file__), 'language_model') 64 | import_models(models_dir, "tinychart.model.language_model") 65 | -------------------------------------------------------------------------------- /TinyChart/tinychart/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tinychart.model.multimodal_encoder.siglip_encoder import SigLipVisionTower 3 | 4 | def build_vision_tower(vision_tower_cfg, **kwargs): 5 | vision_tower = getattr(vision_tower_cfg, 'mm_vision_tower', getattr(vision_tower_cfg, 'vision_tower', None)) 6 | is_absolute_path_exists = os.path.exists(vision_tower) 7 | return SigLipVisionTower(vision_tower, vision_tower_cfg, **kwargs) -------------------------------------------------------------------------------- /TinyChart/tinychart/train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/TinyChart/tinychart/train/__init__.py -------------------------------------------------------------------------------- /UReader/.gitattributes: -------------------------------------------------------------------------------- 1 | *.py eol=lf 2 | *.rst eol=lf 3 | *.md eol=lf 4 | *.mdx eol=lf -------------------------------------------------------------------------------- /UReader/.gitignore: -------------------------------------------------------------------------------- 1 | evaluate_results* 2 | checkpoints/ 3 | benchmark_files/ 4 | ureader_images 5 | ureader_json 6 | ureader_images/ 7 | ureader_json/ 8 | # Initially taken from Github's Python gitignore file 9 | tensorboard/* 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | .ossutil_* 15 | # C extensions 16 | *.so 17 | 18 | # tests and logs 19 | tests/fixtures/cached_*_text.txt 20 | logs/ 21 | lightning_logs/ 22 | lang_code_data/ 23 | 24 | # Distribution / packaging 25 | .Python 26 | build/ 27 | develop-eggs/ 28 | dist/ 29 | downloads/ 30 | eggs/ 31 | .eggs/ 32 | lib/ 33 | lib64/ 34 | parts/ 35 | sdist/ 36 | var/ 37 | wheels/ 38 | *.egg-info/ 39 | .installed.cfg 40 | *.egg 41 | MANIFEST 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .nox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *.cover 63 | .hypothesis/ 64 | .pytest_cache/ 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | local_settings.py 73 | db.sqlite3 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | .python-version 97 | 98 | # celery beat schedule file 99 | celerybeat-schedule 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # vscode 132 | .vs 133 | .vscode 134 | 135 | # Pycharm 136 | .idea 137 | 138 | # TF code 139 | tensorflow_code 140 | 141 | # Models 142 | proc_data 143 | 144 | # examples 145 | runs 146 | /runs_old 147 | /wandb 148 | /output 149 | /configs_dev 150 | /scripts_dev 151 | # /examples/runs 152 | # /examples/**/*.args 153 | # /examples/rag/sweep 154 | 155 | # data 156 | /data 157 | serialization_dir 158 | 159 | # emacs 160 | *.*~ 161 | debug.env 162 | 163 | # vim 164 | .*.swp 165 | 166 | #ctags 167 | tags 168 | 169 | # pre-commit 170 | .pre-commit* 171 | 172 | # .lock 173 | *.lock 174 | 175 | # DS_Store (MacOS) 176 | .DS_Store 177 | 178 | # ruff 179 | .ruff_cache 180 | -------------------------------------------------------------------------------- /UReader/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | # import wget 3 | # resources = os.getenv('resources_new') 4 | # resources_filename = wget.download(resources) 5 | 6 | # os.system('tar zxvf {}'.format(resources_filename)) 7 | 8 | # os.system('ls -l') 9 | 10 | import argparse 11 | import datetime 12 | import json 13 | import os 14 | import time 15 | import torch 16 | 17 | import gradio as gr 18 | import requests 19 | from pipeline.utils import add_config_args, set_args 20 | from sconf import Config 21 | 22 | 23 | if __name__ == "__main__": 24 | from serve.serve_utils import init 25 | io = init() 26 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 27 | log_dir = cur_dir[:-9] + "log" 28 | 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument("--host", type=str, default="0.0.0.0") 31 | parser.add_argument("--debug", action="store_true", help="using debug mode") 32 | parser.add_argument("--port", type=int) 33 | parser.add_argument("--concurrency-count", type=int, default=100) 34 | parser.add_argument("--base-model",type=str, default='checkpoints/ureader') 35 | parser.add_argument("--load-8bit", action="store_true", help="using 8bit mode") 36 | parser.add_argument("--bf16", action="store_true", help="using 8bit mode") 37 | parser.add_argument("--mm_config", type=str, default='configs/sft/release.yaml') 38 | args = parser.parse_args() 39 | config = Config(args.mm_config) 40 | add_config_args(config, args) 41 | set_args(args) 42 | if torch.cuda.is_available(): 43 | device = "cuda" 44 | else: 45 | device = "cpu" 46 | from serve.web_server import mPLUG_Owl_Server, build_demo 47 | model = mPLUG_Owl_Server( 48 | base_model=args.base_model, 49 | log_dir=log_dir, 50 | load_in_8bit=args.load_8bit, 51 | bf16=args.bf16, 52 | device=device, 53 | io=io, 54 | config=config 55 | ) 56 | demo = build_demo(model) 57 | demo.queue(concurrency_count=args.concurrency_count, status_update_rate=10, api_open=False).launch(server_name=args.host, debug=args.debug, server_port=args.port, share=False) -------------------------------------------------------------------------------- /UReader/assets/-twitter-blue.svg: -------------------------------------------------------------------------------- 1 | twittertwitter -------------------------------------------------------------------------------- /UReader/assets/Demo-ModelScope-brightgreen.svg: -------------------------------------------------------------------------------- 1 | Demo: ModelScopeDemoModelScope -------------------------------------------------------------------------------- /UReader/assets/LICENSE-Apache License-blue.svg: -------------------------------------------------------------------------------- 1 | LICENSE: Apache LicenseLICENSEApache License -------------------------------------------------------------------------------- /UReader/assets/Paper-Arxiv-orange.svg: -------------------------------------------------------------------------------- 1 | Paper: ArxivPaperArxiv -------------------------------------------------------------------------------- /UReader/assets/Paper-PDF-orange.svg: -------------------------------------------------------------------------------- 1 | Paper: PDFPaperPDF -------------------------------------------------------------------------------- /UReader/assets/intro_case.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/assets/intro_case.jpg -------------------------------------------------------------------------------- /UReader/assets/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/assets/model.png -------------------------------------------------------------------------------- /UReader/assets/modelscope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/assets/modelscope.png -------------------------------------------------------------------------------- /UReader/assets/modelscopeIcon.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /UReader/configs/sft/release.yaml: -------------------------------------------------------------------------------- 1 | # [ [1, 1], 2 | # [1, 2], [2, 1], 3 | # [3, 1], [1, 3], 4 | # [2, 2], [4, 1], [1, 4], 5 | # [5, 1], [1, 5], 6 | # [1, 6], [6, 1], 7 | # [7, 1], [1, 7], 8 | # [4, 2], [2, 4], [1, 8], [8, 1], 9 | # [1, 9], [3, 3], [9, 1], 10 | # [6, 2], [2, 6], 11 | # [2, 8], [8, 2], [4, 4], 12 | # [3, 6], [6, 3], 13 | # [10, 2], [2, 10], 14 | # [12, 2], [2, 12], 15 | # [5, 5], 16 | # [3, 9], [9, 3], 17 | # [2, 14], [14, 2], 18 | # [4, 8], [2, 16], [8, 4], [16, 2], 19 | # [12, 3], [3, 12], [18, 2], [2, 18], [6, 6]] 20 | train_processors: { 21 | sft: {type: 'DocNewMultiScaleSFTProcessor', image_size: 224, 22 | anchors: [[1, 1], [1, 2], [2, 1], [1, 3], [3, 1], [1, 4], [2, 2], [4, 1], [1, 5], [5, 1], [1, 6], [2, 3], [3, 2], [6, 1], [1, 7], [7, 1], [1, 8], [2, 4], [4, 2], [8, 1], [1, 9], [3, 3], [9, 1], [1, 10], [2, 5], [5, 2], [10, 1], [1, 11], [11, 1], [2, 6], [3, 4], [4, 3], [6, 2], [2, 7], [7, 2], [3, 5], [5, 3], [2, 8], [4, 4], [8, 2], [2, 9], [3, 6], [6, 3], [9, 2], [2, 10], [4, 5], [5, 4], [10, 2]]} 23 | } 24 | 25 | valid_processors: { 26 | sft: {type: 'DocNewMultiScaleSFTProcessor', image_size: 224, 27 | anchors: [[1, 1], [1, 2], [2, 1], [1, 3], [3, 1], [1, 4], [2, 2], [4, 1], [1, 5], [5, 1], [1, 6], [2, 3], [3, 2], [6, 1], [1, 7], [7, 1], [1, 8], [2, 4], [4, 2], [8, 1], [1, 9], [3, 3], [9, 1], [1, 10], [2, 5], [5, 2], [10, 1], [1, 11], [11, 1], [2, 6], [3, 4], [4, 3], [6, 2], [2, 7], [7, 2], [3, 5], [5, 3], [2, 8], [4, 4], [8, 2], [2, 9], [3, 6], [6, 3], [9, 2], [2, 10], [4, 5], [5, 4], [10, 2]]} 28 | } 29 | 30 | data_files: [ 31 | 'ureader_json/train.jsonl', 32 | 'ureader_json/val.jsonl' 33 | ] 34 | # 654840 35 | 36 | patch_pos_embed_type: post 37 | 38 | -------------------------------------------------------------------------------- /UReader/ds_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | "zero_optimization": { 14 | "stage": 1 15 | }, 16 | "train_batch_size": "auto", 17 | "train_micro_batch_size_per_gpu": "auto" 18 | } -------------------------------------------------------------------------------- /UReader/examples/Yao_Ming.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/Yao_Ming.jpeg -------------------------------------------------------------------------------- /UReader/examples/ca.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/ca.jpeg -------------------------------------------------------------------------------- /UReader/examples/docowl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/docowl.jpg -------------------------------------------------------------------------------- /UReader/examples/fridge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/fridge.jpg -------------------------------------------------------------------------------- /UReader/examples/fruits.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/fruits.jpg -------------------------------------------------------------------------------- /UReader/examples/laundry.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/laundry.jpeg -------------------------------------------------------------------------------- /UReader/examples/monalisa-fun.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/monalisa-fun.jpg -------------------------------------------------------------------------------- /UReader/examples/monday.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/monday.jpg -------------------------------------------------------------------------------- /UReader/examples/mug_ad.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/mug_ad.jpeg -------------------------------------------------------------------------------- /UReader/examples/owl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/owl.jpg -------------------------------------------------------------------------------- /UReader/examples/rap.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/rap.jpeg -------------------------------------------------------------------------------- /UReader/examples/table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/table.jpg -------------------------------------------------------------------------------- /UReader/examples/titanic.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/titanic.jpeg -------------------------------------------------------------------------------- /UReader/examples/vga.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/vga.jpeg -------------------------------------------------------------------------------- /UReader/examples/website.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/examples/website.jpg -------------------------------------------------------------------------------- /UReader/mplug_owl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import TYPE_CHECKING 15 | 16 | from transformers.utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available 17 | 18 | 19 | _import_structure = { 20 | "configuration_mplug_owl": ["MPLUG_OWL_PRETRAINED_CONFIG_ARCHIVE_MAP", "MplugOwlConfig"], 21 | "processing_mplug_owl": ["MplugOwlImageProcessor", "MplugOwlProcessor"], 22 | "tokenization_mplug_owl": ["MplugOwlTokenizer"], 23 | } 24 | 25 | try: 26 | if not is_tokenizers_available(): 27 | raise OptionalDependencyNotAvailable() 28 | except OptionalDependencyNotAvailable: 29 | pass 30 | 31 | 32 | try: 33 | if not is_torch_available(): 34 | raise OptionalDependencyNotAvailable() 35 | except OptionalDependencyNotAvailable: 36 | pass 37 | else: 38 | _import_structure["modeling_mplug_owl"] = [ 39 | "MPLUG_OWL_PRETRAINED_MODEL_ARCHIVE_LIST", 40 | "MplugOwlForConditionalGeneration", 41 | "MplugOwlModel", 42 | ] 43 | 44 | 45 | if TYPE_CHECKING: 46 | from .configuration_mplug_owl import MPLUG_OWL_PRETRAINED_CONFIG_ARCHIVE_MAP, MplugOwlConfig 47 | from .tokenization_mplug_owl import MplugOwlTokenizer 48 | 49 | try: 50 | if not is_tokenizers_available(): 51 | raise OptionalDependencyNotAvailable() 52 | except OptionalDependencyNotAvailable: 53 | pass 54 | 55 | try: 56 | if not is_torch_available(): 57 | raise OptionalDependencyNotAvailable() 58 | except OptionalDependencyNotAvailable: 59 | pass 60 | else: 61 | from .modeling_mplug_owl import ( 62 | MPLUG_OWL_PRETRAINED_MODEL_ARCHIVE_LIST, 63 | MplugOwlForConditionalGeneration, 64 | MplugOwlModel, 65 | MplugOwlPreTrainedModel, 66 | ) 67 | 68 | 69 | else: 70 | import sys 71 | 72 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 73 | 74 | from .configuration_mplug_owl import * 75 | from .modeling_mplug_owl import * 76 | from .processing_mplug_owl import * 77 | from .tokenization_mplug_owl import * 78 | -------------------------------------------------------------------------------- /UReader/mplug_owl/tokenization_mplug_owl.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 x-plug and The HuggingFace Inc. team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Tokenization classes for MplugOwl.""" 16 | 17 | from transformers.utils import logging 18 | from transformers.models.llama.tokenization_llama import LlamaTokenizer 19 | 20 | 21 | logger = logging.get_logger(__name__) 22 | 23 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} 24 | 25 | PRETRAINED_VOCAB_FILES_MAP = { 26 | "vocab_file": { 27 | "MAGAer13/mplug-owl-llama-7b": "https://huggingface.co/MAGAer13/mplug-owl-llama-7b/resolve/main/vocab.txt", 28 | }, 29 | } 30 | 31 | PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { 32 | "MAGAer13/mplug-owl-llama-7b": 1024, 33 | } 34 | 35 | 36 | class MplugOwlTokenizer(LlamaTokenizer): 37 | def __init__( 38 | self, 39 | vocab_file, 40 | unk_token="", 41 | bos_token="", 42 | eos_token="", 43 | pad_token="", 44 | sp_model_kwargs=None, 45 | add_bos_token=False, 46 | add_eos_token=False, 47 | clean_up_tokenization_spaces=False, 48 | **kwargs, 49 | ): 50 | super().__init__( 51 | vocab_file, 52 | unk_token, 53 | bos_token, 54 | eos_token, 55 | pad_token, 56 | sp_model_kwargs, 57 | add_bos_token, 58 | add_eos_token, 59 | clean_up_tokenization_spaces, 60 | **kwargs, 61 | ) 62 | self.eod_id = self.eos_token_id 63 | -------------------------------------------------------------------------------- /UReader/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/pipeline/__init__.py -------------------------------------------------------------------------------- /UReader/pipeline/data_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .processors.builder import build_processors 2 | from .xgpt3_dataset import MultiModalDataset 3 | 4 | def train_valid_test_datasets_provider(data_path, config, tokenizer, seq_length=1024,image_root='ureader_images'): 5 | """Build train and valid datasets.""" 6 | print('> building train and validation datasets for mPLUG-Owl ...') 7 | train_ds, valid_ds = build_train_valid_test_datasets( 8 | input_file=data_path, 9 | tokenizer=tokenizer, 10 | max_length=seq_length, 11 | config=config, 12 | image_root=image_root) 13 | print("> finished creating mPLUG-Owl datasets ...") 14 | 15 | return train_ds, valid_ds 16 | 17 | def build_train_valid_test_datasets(input_file, tokenizer, max_length=80, config=None,image_root='ureader_images'): 18 | train_processors = build_processors(config['train_processors']) 19 | valid_processors = build_processors(config['valid_processors']) 20 | if isinstance(input_file, dict): 21 | train_ds = MultiModalDataset(input_file['train'][0], tokenizer, train_processors, max_length, image_root=image_root) 22 | valid_ds = {name: MultiModalDataset(ds, tokenizer, valid_processors, max_length) for name,ds in input_file['valid'].items()} 23 | test_ds = None 24 | 25 | else: 26 | assert len(input_file) == 2 # If you have files more than 2, modify code at here or merger them into train and dev 27 | train_ds = MultiModalDataset(input_file[0], tokenizer, train_processors, max_length) 28 | valid_ds = MultiModalDataset(input_file[1], tokenizer, valid_processors, max_length) 29 | test_ds = None 30 | return (train_ds, valid_ds) 31 | -------------------------------------------------------------------------------- /UReader/pipeline/data_utils/processors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Alibaba. All rights reserved. 2 | from .builder import PROCESSORS, build_processors 3 | from .default_processor import DefaultProcessor 4 | from .caption_processor import CaptionProcessor 5 | from .doc_processor import DocPretrainProcessor, DocSFTProcessor 6 | __all__ = [ 7 | 'PROCESSORS', 'build_processors', 8 | 'DefaultProcessor', 'CaptionProcessor', 9 | 'DocPretrainProcessor', 'DocSFTProcessor' 10 | ] -------------------------------------------------------------------------------- /UReader/pipeline/data_utils/processors/builder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from icecream import ic 4 | from pipeline.data_utils.registry import Registry, build_from_cfg 5 | # from .data_utils.registry import Registry, build_from_cfg 6 | 7 | PROCESSORS = Registry('processors') 8 | 9 | def build_processors(processors_cfg): 10 | processors = dict() 11 | for task, processor in processors_cfg.items(): 12 | processors[task] = build_from_cfg(processor, PROCESSORS) 13 | ic(type(processors[task])) 14 | return processors 15 | -------------------------------------------------------------------------------- /UReader/pipeline/data_utils/processors/caption_processor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import transforms 3 | from PIL import Image 4 | import random 5 | 6 | from pipeline.data_utils.randaugment import RandomAugment 7 | from .builder import PROCESSORS 8 | 9 | 10 | @PROCESSORS.register_module() 11 | class CaptionProcessor: 12 | def __init__(self, image_size=224, min_scale = 0.5, randaug=False): 13 | self.image_size = image_size 14 | self.min_scale = min_scale 15 | 16 | if randaug: 17 | self.image_transform = transforms.Compose([ 18 | transforms.RandomResizedCrop(image_size,scale=(min_scale, 1.0), interpolation=Image.BICUBIC), 19 | transforms.RandomHorizontalFlip(), 20 | RandomAugment(2,7,isPIL=True,augs=['Identity','AutoContrast','Equalize','Brightness','Sharpness', 21 | 'ShearX', 'ShearY', 'TranslateX', 'TranslateY', 'Rotate']), 22 | transforms.ToTensor(), 23 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), 24 | ]) 25 | else: 26 | self.image_transform = transforms.Compose([ 27 | transforms.RandomResizedCrop(image_size,scale=(min_scale, 1.0), interpolation=Image.BICUBIC), 28 | transforms.RandomHorizontalFlip(), 29 | transforms.ToTensor(), 30 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), 31 | ]) 32 | self.text_transform = None 33 | 34 | def __call__(self, image, text): 35 | assert image or text 36 | 37 | if image: 38 | image_input = self.image_transform(image) 39 | else: 40 | image_input = None 41 | 42 | if text: 43 | if isinstance(text["prompt"], list): 44 | prompt = random.choice(text["prompt"]) 45 | else: 46 | prompt = text["prompt"] 47 | text_input = dict( 48 | prompt=prompt, 49 | completion=text["text"], 50 | ) 51 | else: 52 | text_input = None 53 | return image_input, text_input -------------------------------------------------------------------------------- /UReader/pipeline/data_utils/processors/default_processor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import transforms 3 | from PIL import Image 4 | import random 5 | 6 | from pipeline.data_utils.randaugment import RandomAugment 7 | from .builder import PROCESSORS 8 | 9 | 10 | @PROCESSORS.register_module() 11 | class DefaultProcessor: 12 | def __init__(self, image_size=224): 13 | self.image_size = image_size 14 | 15 | self.image_transform = transforms.Compose([ 16 | transforms.Resize((image_size, image_size),interpolation=Image.BICUBIC), 17 | transforms.ToTensor(), 18 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), 19 | ]) 20 | 21 | self.text_transform = None 22 | 23 | def __call__(self, image, text): 24 | assert image or text 25 | 26 | if image: 27 | image_input = self.image_transform(image) 28 | else: 29 | image_input = None 30 | 31 | if text: 32 | if isinstance(text["prompt"], list): 33 | prompt = random.choice(text["prompt"]) 34 | else: 35 | prompt = text["prompt"] 36 | text_input = dict( 37 | prompt=prompt, 38 | completion=text["text"], 39 | ) 40 | else: 41 | text_input = None 42 | return image_input, text_input -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/pipeline/eval_utils/__init__.py -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | from .__main__ import cli_main 2 | from .due_evaluator import DueEvaluator 3 | 4 | __all__ = ['DueEvaluator', 'cli_main'] 5 | -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import sys 6 | from typing import Optional, Set 7 | import json 8 | 9 | from .due_evaluator import DueEvaluator 10 | from .utils import property_scores_to_string 11 | 12 | 13 | def parse_args(): 14 | """Parse CLI arguments. 15 | 16 | Returns: 17 | namespace: namespace with parsed variables. 18 | 19 | """ 20 | parser = argparse.ArgumentParser('Document Understanding Evaluator') 21 | parser.add_argument( 22 | '--out-files', 23 | '-o', 24 | type=argparse.FileType('r', encoding='utf-8'), 25 | required=True, 26 | nargs='+', 27 | help='Out file to evaluate', 28 | ) 29 | parser.add_argument( 30 | '--reference', '-r', type=argparse.FileType('r', encoding='utf-8'), required=True, help='Reference file', 31 | ) 32 | parser.add_argument('--metric', '-m', type=str, default='F1', choices=['F1', 'MEAN-F1', 'ANLS', 'WTQ', 'GROUP-ANLS']) 33 | parser.add_argument( 34 | '--return-score', 35 | default='F1', 36 | choices=['F1', 'mean-F1', 'ANLS', 'mean-Precision', 'mean-Recall', 'WTQ'], 37 | help='Return WR-like mean-F1 score', 38 | ) 39 | parser.add_argument('--line-by-line', action='store_true', default=False, help='Return retults example-based') 40 | parser.add_argument( 41 | '--columns', type=str, nargs='+', default=['Precision', 'Recall', 'F1'], help='Columns', 42 | ) 43 | parser.add_argument( 44 | '--print-format', 45 | default='text', 46 | type=str, 47 | choices=['text', 'latex', 'json'], 48 | help='Print feature table in the given format', 49 | ) 50 | parser.add_argument('--properties', nargs='+', type=str, help='Property set to be limitted to') 51 | parser.add_argument( 52 | '--ignore-case', '-i', action='store_true', default=False, help='Property set to be limitted to', 53 | ) 54 | return parser.parse_args() 55 | 56 | 57 | def cli_main(args: argparse.Namespace): 58 | """CLI main. 59 | 60 | Args: 61 | args: cli arguments 62 | """ 63 | reference = [json.loads(line) for line in args.reference] 64 | 65 | evaluators = [] 66 | for out_file in args.out_files: 67 | predictions = [json.loads(line) for line in out_file] 68 | 69 | property_set: Optional[Set[str]] 70 | if args.properties: 71 | property_set = args.properties 72 | else: 73 | property_set = None 74 | 75 | evaluators.append( 76 | DueEvaluator(reference, predictions, property_set, args.ignore_case, out_file.name, args.metric) 77 | ) 78 | 79 | prop_str = property_scores_to_string(evaluators, args.print_format, args.columns) 80 | if args.print_format != 'json': 81 | print(prop_str, file=sys.stderr) 82 | 83 | if args.line_by_line: 84 | for idx, score in enumerate(evaluators[0].line_by_line()): 85 | print(f'{idx}: {score}', file=sys.stderr) 86 | return prop_str 87 | 88 | 89 | def main() -> None: 90 | """Main.""" 91 | args = parse_args() 92 | cli_main(args) 93 | 94 | 95 | if __name__ == '__main__': 96 | main() 97 | -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/__version__.py: -------------------------------------------------------------------------------- 1 | """Version specification.""" 2 | 3 | VERSION = (0, 0, 8) 4 | __version__ = '.'.join(map(str, VERSION)) 5 | -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/pipeline/eval_utils/due_evaluator/py.typed -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/scorers/__init__.py: -------------------------------------------------------------------------------- 1 | from .anls_scorer import AnlsScorer 2 | from .base_scorer import BaseScorer 3 | from .fscorer import FScorer 4 | from .mean_fscorer import MeanFScorer 5 | from .wtq_scorer import WtqScorer 6 | from .group_anls import GroupAnlsScorer 7 | from .geval_scorer import GevalScorer 8 | 9 | __all__ = ['AnlsScorer', 'BaseScorer', 'FScorer', 'MeanFScorer', 'WtqScorer', 'GevalScorer', 'GroupAnlsScorer'] 10 | -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/scorers/accuracy_scorer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | from operator import itemgetter 4 | 5 | from .base_scorer import BaseScorer 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class AccuracyScorer(BaseScorer): 11 | """Accuracy Scorer.""" 12 | 13 | def __init__(self, threshold: float = 0.5): 14 | self.__scores: List[float] = [] 15 | self.threshold = threshold 16 | 17 | @property 18 | def scores(self): 19 | return self.__scores 20 | 21 | def check_denotation(self, out: list, ref: list) -> bool: 22 | return out == ref 23 | 24 | def add(self, out_items: List[dict], ref_items: List[dict]): 25 | """Add more items for computing corpus level scores. 26 | 27 | Args: 28 | out_items: outs from a single document (line) 29 | ref_items: reference of the evaluated document (line) 30 | 31 | """ 32 | out_ann = sorted(out_items['annotations'], key=itemgetter('key')) 33 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key')) 34 | assert [a['key'] for a in out_ann] == [a['key'] for a in ref_ann] 35 | 36 | for out, ref in zip(out_ann, ref_ann): 37 | o_values = [v['value'] for v in out['values']] 38 | r_values = [v['value'] for v in ref['values']] 39 | score = int(self.check_denotation(o_values, r_values)) 40 | self.__scores.append(score) 41 | 42 | def score(self) -> float: 43 | if self.__scores: 44 | return sum(self.__scores) / len(self.__scores) 45 | return 0.0 46 | 47 | @classmethod 48 | def support_feature_scores(cls) -> bool: 49 | return False 50 | 51 | @classmethod 52 | def metric_name(cls) -> str: 53 | return "Accuracy" 54 | -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/scorers/anls_scorer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | from operator import itemgetter 4 | 5 | import textdistance 6 | 7 | from .base_scorer import BaseScorer 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class AnlsScorer(BaseScorer): 13 | """ANSL Scorer.""" 14 | 15 | def __init__(self, threshold: float = 0.5): 16 | self.__scores: List[float] = [] 17 | self.threshold = threshold 18 | 19 | @property 20 | def scores(self): 21 | return self.__scores 22 | 23 | def add(self, out_items: List[dict], ref_items: List[dict]): 24 | """Add more items for computing corpus level scores. 25 | 26 | Args: 27 | out_items: outs from a single document (line) 28 | ref_items: reference of the evaluated document (line) 29 | 30 | """ 31 | out_ann = sorted(out_items['annotations'], key=itemgetter('key')) 32 | ref_ann = sorted(ref_items['annotations'], key=itemgetter('key')) 33 | assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann] 34 | 35 | """try: 36 | # assert [a['key'][:100] for a in out_ann] == [a['key'][:100] for a in ref_ann] 37 | out_keys = [a['key'][:100] for a in out_ann] 38 | ref_keys = [a['key'][:100] for a in ref_ann] 39 | # assert out_keys == ref_keys 40 | for i in range(len(out_keys)): 41 | try: 42 | assert out_keys[i] == ref_keys[i] 43 | except AssertionError as e: 44 | print(out_keys[i]) 45 | print(ref_keys[i]) 46 | print('==============') 47 | # exit(0) 48 | 49 | except AssertionError as e: 50 | print('key of pred and gt unmatched:') 51 | # print('pred:', out_keys) 52 | # print('gt:', ref_keys) 53 | exit(0)""" 54 | 55 | for out, ref in zip(out_ann, ref_ann): 56 | assert len(out['values']) == 1 57 | val = out['values'][0]['value'] 58 | possible_vals = ref['values'][0]['value_variants'] 59 | best_score = max([textdistance.levenshtein.normalized_similarity(val, pos) 60 | for pos in possible_vals]) 61 | if 1 - self.threshold >= best_score: 62 | best_score = 0.0 63 | self.__scores.append(best_score) 64 | 65 | def score(self) -> float: 66 | if self.__scores: 67 | return sum(self.__scores) / len(self.__scores) 68 | return 0.0 69 | 70 | @classmethod 71 | def support_feature_scores(cls) -> bool: 72 | return False 73 | 74 | @classmethod 75 | def metric_name(cls) -> str: 76 | return "ANLS" 77 | -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/scorers/base_scorer.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List 3 | 4 | 5 | class BaseScorer(abc.ABC): 6 | """Abstract class for scorers.""" 7 | 8 | @abc.abstractmethod 9 | def add(self, out_items: List[dict], ref_items: List[dict]): 10 | pass 11 | 12 | @abc.abstractmethod 13 | def score(self): 14 | pass 15 | 16 | @abc.abstractclassmethod 17 | def support_feature_scores(cls) -> bool: 18 | pass 19 | 20 | @abc.abstractclassmethod 21 | def metric_name(cls) -> str: 22 | pass 23 | -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/scorers/geval_scorer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import tempfile 3 | from collections import defaultdict 4 | import os 5 | 6 | from .fscorer import FScorer 7 | from .base_scorer import BaseScorer 8 | 9 | 10 | GEVAL_BINARY = os.getenv('GEVAL_BINARY', '/data/shared/bin/geval') 11 | GEVAL_METRIC = os.getenv('GEVAL_METRIC', 'MultiLabel-F1:cN') 12 | 13 | 14 | class GevalScorer(BaseScorer): 15 | def __init__(self): 16 | self.__ref = tempfile.NamedTemporaryFile('w+t') 17 | self.__out = tempfile.NamedTemporaryFile('w+t') 18 | self.__ref_data = defaultdict(set) 19 | self.__out_data = defaultdict(set) 20 | 21 | @staticmethod 22 | def add_to_geval_data(data, line): 23 | name = line['name'] 24 | for annotation in line['annotations']: 25 | for idx, val in enumerate(annotation['values'], 1): 26 | for child in val['children']: 27 | new_name = child['key'] + '__' + str(idx) if '__' in child['key'] else child['key'] 28 | if child['values'] and child['values'] != ['']: 29 | new_value = '|'.join([v['value'].replace(' ', '_') for v in child['values']]) 30 | data[name].add(f'{new_name}={new_value}') 31 | 32 | def save_geval_files(self): 33 | for name in sorted(self.__ref_data.keys()): 34 | self.__ref.write(' '.join(self.__ref_data[name]) + '\n') 35 | self.__out.write(' '.join(self.__out_data[name]) + '\n') 36 | 37 | def add(self, out_items: List[str], ref_items: List[str]): 38 | self.add_to_geval_data(self.__out_data, out_items) 39 | self.add_to_geval_data(self.__ref_data, ref_items) 40 | 41 | def support_feature_scores(cls) -> bool: 42 | return False 43 | 44 | def metric_name(cls) -> str: 45 | return "GEVAL" 46 | 47 | def run_geval(self): 48 | self.__ref.flush() 49 | self.__out.flush() 50 | try: 51 | return float(os.popen(f'{GEVAL_BINARY} -o {self.__out.name} -e {self.__ref.name} --metric {GEVAL_METRIC}').read()) 52 | except: 53 | return -1 54 | 55 | def score(self) -> float: 56 | self.save_geval_files() 57 | return self.run_geval() 58 | -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/scorers/mean_fscorer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from .fscorer import FScorer 4 | from .base_scorer import BaseScorer 5 | 6 | 7 | class MeanFScorer(BaseScorer): 8 | def __init__(self): 9 | self.__scores: List[float] = [] 10 | 11 | def add(self, out_items: List[str], ref_items: List[str]): 12 | fscorer = FScorer() 13 | fscorer.add(out_items, ref_items) 14 | self.__scores.append(fscorer.f_score()) 15 | 16 | def support_feature_scores(cls) -> bool: 17 | return False 18 | 19 | def metric_name(cls) -> str: 20 | return "MEAN-F1" 21 | 22 | def score(self) -> float: 23 | if self.__scores: 24 | return sum(self.__scores) / len(self.__scores) 25 | return 0.0 26 | -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/due_evaluator/utils.py: -------------------------------------------------------------------------------- 1 | from .scorers.fscorer import FScorer 2 | from typing import Dict, List, Optional, Sequence, Union 3 | 4 | import pandas as pd 5 | 6 | from .due_evaluator import DueEvaluator 7 | 8 | 9 | def dataframe_to_print(df: pd.DataFrame, print_format: Optional[str] = 'text') -> str: 10 | """Export dataframe to json or plain text. 11 | 12 | Args: 13 | df (pd.DataFrame): data 14 | print_format (str, optional): Print format. Defaults to 'text'. 15 | 16 | Raises: 17 | ValueError: unknown print_format 18 | 19 | Returns: 20 | str: printed version of dataframe 21 | 22 | """ 23 | out: str 24 | if print_format == 'latex': 25 | out = df.reset_index().to_latex(index=False) 26 | elif print_format == 'text': 27 | out = df.reset_index().to_string(index=False) 28 | elif print_format == 'json': 29 | out = df.to_json(orient='index') 30 | else: 31 | raise ValueError() 32 | return out 33 | 34 | 35 | def property_scores_to_string( 36 | dues: List[DueEvaluator], print_format: str = 'text', columns: Sequence[str] = ('Precision', 'Recall', 'F-1'), 37 | ) -> str: 38 | """Print out scores per property. 39 | 40 | Args: 41 | dues: List of DueEvaluators 42 | print_format: output format: text or latex 43 | columns: a list of metrics to print 44 | 45 | Returns: 46 | str: string table with feature scores. 47 | 48 | """ 49 | data = [] 50 | for property_name in sorted(dues[0].property_scorers.keys()) + ['ALL']: 51 | row_data: Dict[str, Union[str, float]] = {} 52 | row_data['Label'] = property_name 53 | for due in dues: 54 | if len(dues) == 1: 55 | suffix = '' 56 | else: 57 | suffix = f' ({due.path})' 58 | if property_name == 'ALL': 59 | scorer = due.general_scorer 60 | else: 61 | scorer = due.property_scorers[property_name] 62 | 63 | row_data[scorer.metric_name() + suffix] = scorer.score() 64 | if isinstance(scorer, FScorer): 65 | if 'Precision' in columns: 66 | row_data['Precision' + suffix] = scorer.precision() 67 | if 'Recall' in columns: 68 | row_data['Recall' + suffix] = scorer.recall() 69 | data.append(row_data) 70 | 71 | df = pd.DataFrame(data) 72 | df.set_index('Label', drop=True, inplace=True) 73 | 74 | return dataframe_to_print(df, print_format) 75 | -------------------------------------------------------------------------------- /UReader/pipeline/eval_utils/run_evaluation.py: -------------------------------------------------------------------------------- 1 | from .tools import llm_answer_eval, postprocess_llm_vqa, textcaps_textvqa_eval 2 | 3 | if __name__ == '__main__': 4 | 5 | llm_answer_eval(metric_names=['RelaxedAccuracy'], result_path='evaluate_results/test_ChartQA.jsonl', save_each_eval=True) 6 | llm_answer_eval(metric_names=['ExactAccuracy'], result_path='evaluate_results/test_TabFact.jsonl', save_each_eval=True) 7 | llm_answer_eval(metric_names=['BLEU1', 'BLEU2', 'BLEU3', 'BLEU4', 'Meteor', 'RougeL', 'CIDEr'], result_path='evaluate_results/test_VisualMRC.jsonl', save_each_eval=True) 8 | 9 | 10 | postprocess_llm_vqa(dataset_name='DeepFormQA', split='test', 11 | llm_pred_path='./evaluate_results/test_DeepForm.jsonl', 12 | eval_flag=True) 13 | postprocess_llm_vqa(dataset_name='DocVQA', split='test', 14 | llm_pred_path='./evaluate_results/test_DocVQA.jsonl', 15 | eval_flag=True) 16 | postprocess_llm_vqa(dataset_name='InfographicsVQA', split='test', 17 | llm_pred_path='evaluate_results/test_InfographicsVQA.jsonl', 18 | eval_flag=True) 19 | postprocess_llm_vqa(dataset_name='KleisterCharityQA', split='test', 20 | llm_pred_path='evaluate_results/test_KleisterCharity.jsonl', 21 | eval_flag=True) 22 | postprocess_llm_vqa(dataset_name='WikiTableQuestions', split='test', 23 | llm_pred_path='evaluate_results/test_WikiTableQuestions.jsonl', 24 | eval_flag=True) 25 | 26 | # need to submit evaluate_results/***_official_eval.json 27 | textcaps_textvqa_eval(result_path='evaluate_results/test_TextCaps.jsonl', dataset='TextCaps', split='test') 28 | textcaps_textvqa_eval(result_path='evaluate_results/test_TextVQA.jsonl', dataset='TextVQA', split='test') 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /UReader/pipeline/interface.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import requests 4 | from PIL import Image 5 | from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration 6 | from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer 7 | from mplug_owl.processing_mplug_owl import MplugOwlImageProcessor, MplugOwlProcessor 8 | from sconf import Config 9 | from pipeline.data_utils.processors.builder import build_processors 10 | 11 | 12 | def get_model(pretrained_ckpt, use_bf16=False): 13 | """Model Provider with tokenizer and processor. 14 | 15 | Args: 16 | pretrained_ckpt (string): The path to pre-trained checkpoint. 17 | use_bf16 (bool, optional): Whether to use bfloat16 to load the model. Defaults to False. 18 | 19 | Returns: 20 | model: MplugOwl Model 21 | tokenizer: MplugOwl text tokenizer 22 | processor: MplugOwl processor (including text and image) 23 | """ 24 | model = MplugOwlForConditionalGeneration.from_pretrained( 25 | pretrained_ckpt, 26 | torch_dtype=torch.bfloat16 if use_bf16 else torch.half, 27 | ) 28 | config = Config('configs/sft/release.yaml') 29 | image_processor = build_processors(config['valid_processors'])['sft'] 30 | tokenizer = MplugOwlTokenizer.from_pretrained(pretrained_ckpt) 31 | processor = MplugOwlProcessor(image_processor, tokenizer) 32 | return model, tokenizer, processor 33 | 34 | 35 | def do_generate(prompts, image_list, model, tokenizer, processor, use_bf16=False, **generate_kwargs): 36 | """The interface for generation 37 | 38 | Args: 39 | prompts (List[str]): The prompt text 40 | image_list (List[str]): Paths of images 41 | model (MplugOwlForConditionalGeneration): MplugOwlForConditionalGeneration 42 | tokenizer (MplugOwlTokenizer): MplugOwlTokenizer 43 | processor (MplugOwlProcessor): MplugOwlProcessor 44 | use_bf16 (bool, optional): Whether to use bfloat16. Defaults to False. 45 | 46 | Returns: 47 | sentence (str): Generated sentence. 48 | """ 49 | if image_list: 50 | images = [Image.open(_) for _ in image_list] 51 | else: 52 | images = None 53 | inputs = processor(text=prompts, images=images, return_tensors='pt') 54 | inputs = {k: v.bfloat16() if v.dtype == torch.float else v for k, v in inputs.items()} 55 | inputs = {k: v.to(model.device) for k, v in inputs.items()} 56 | with torch.no_grad(): 57 | res = model.generate(**inputs, **generate_kwargs) 58 | sentence = tokenizer.decode(res.tolist()[0], skip_special_tokens=True) 59 | return sentence 60 | 61 | 62 | if __name__ == '__main__': 63 | pass 64 | -------------------------------------------------------------------------------- /UReader/pipeline/trainer.py: -------------------------------------------------------------------------------- 1 | import torch.distributed as dist 2 | import argparse 3 | from functools import partial 4 | 5 | import torch 6 | 7 | from torch.utils.data import DataLoader, Dataset 8 | from torch.utils.data.distributed import DistributedSampler 9 | 10 | from transformers import Trainer 11 | 12 | from pipeline.utils import batchify 13 | 14 | 15 | class CustomTrainer(Trainer): 16 | 17 | def get_train_dataloader(self) -> DataLoader: 18 | dataset = self.train_dataset 19 | sampler = DistributedSampler(dataset) 20 | return torch.utils.data.DataLoader( 21 | dataset, batch_size=self._train_batch_size, 22 | sampler=sampler, 23 | num_workers=self.args.dataloader_num_workers, 24 | drop_last=True, 25 | pin_memory=False, 26 | collate_fn=batchify) 27 | 28 | 29 | def get_eval_dataloader(self, eval_dataset: Dataset | None = None) -> DataLoader: 30 | dataset = self.eval_dataset 31 | sampler = DistributedSampler(dataset, shuffle=False) 32 | return torch.utils.data.DataLoader( 33 | dataset, batch_size=self._train_batch_size, 34 | sampler=sampler, 35 | num_workers=self.args.dataloader_num_workers, 36 | drop_last=True, 37 | pin_memory=False, 38 | collate_fn=batchify) -------------------------------------------------------------------------------- /UReader/scripts/eval/eval_benchmark.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=`pwd` 2 | python -m torch.distributed.launch --use_env \ 3 | --nproc_per_node ${NPROC_PER_NODE:-8} \ 4 | --nnodes ${WORLD_SIZE:-1} \ 5 | --node_rank ${RANK:-0} \ 6 | --master_addr ${MASTER_ADDR:-127.0.0.1} \ 7 | --master_port ${MASTER_PORT:-12345} \ 8 | pipeline/evaluation.py \ 9 | --hf_model ./checkpoints/ureader -------------------------------------------------------------------------------- /UReader/scripts/train_it.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # For A100 80G 3 | DIR=`pwd` 4 | export PYTHONPATH=$DIR 5 | DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'` 6 | 7 | if [ $MASTER_ADDR ];then 8 | echo $MASTER_ADDR 9 | echo $MASTER_PORT 10 | echo $WORLD_SIZE 11 | echo $RANK 12 | else 13 | MASTER_ADDR=127.0.0.1 14 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15 15 | WORLD_SIZE=1 16 | RANK=0 17 | fi 18 | 19 | DISTRIBUTED_ARGS="--nproc_per_node 1 \ 20 | --nnodes ${WORLD_SIZE} \ 21 | --node_rank ${RANK} \ 22 | --master_addr ${MASTER_ADDR} \ 23 | --master_port ${MASTER_PORT}" 24 | 25 | EXP_NAME=ureader 26 | 27 | max_length=2048 28 | micro_batch_size=4 29 | global_batch_size=256 30 | gradient_accumulation_steps=1 31 | 32 | SAVE_NAME=${ureader}_${max_length}_${global_batch_size} 33 | 34 | SAVE_PATH="./output/${EXP_NAME}/" 35 | TENSORBOARD_PATH="./tensorboard/sft/${SAVE_NAME}/" 36 | 37 | 38 | 39 | # train_iters = total_data * train_epochs // global_batch_size 40 | train_epochs=10 41 | train_iters=25579 42 | 43 | lr_warmup_iters=50 44 | 45 | eval_iter=50 46 | eval_interval=50 47 | save_interval=500 48 | 49 | mkdir -p ${SAVE_PATH} 50 | mkdir -p ${TENSORBOARD_PATH} 51 | 52 | options=" \ 53 | --pretrained-ckpt checkpoints/mplug-owl-llama-7b \ 54 | --seq-length ${max_length} \ 55 | --micro-batch-size ${micro_batch_size} \ 56 | --global-batch-size ${global_batch_size} \ 57 | --num-training-steps ${train_iters} \ 58 | --train-epochs ${train_epochs} \ 59 | --num-warmup-steps ${lr_warmup_iters} \ 60 | --gradient-accumulation-steps ${gradient_accumulation_steps} \ 61 | --lr 1e-4 \ 62 | --min-lr 1e-6 \ 63 | --eval-iters ${eval_iter} \ 64 | --save-interval ${save_interval} \ 65 | --save-path ${SAVE_PATH} \ 66 | --tensorboard-dir ${TENSORBOARD_PATH} \ 67 | --clip-grad 1.0 \ 68 | --weight-decay 0.0001 \ 69 | --adam-beta1 0.9 \ 70 | --adam-beta2 0.999 \ 71 | --num-workers 16 \ 72 | --use-lora \ 73 | --gradient-checkpointing \ 74 | --bf16" 75 | 76 | multimodal_options=" \ 77 | --mm-config configs/sft/release.yaml 78 | " 79 | 80 | python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log -------------------------------------------------------------------------------- /UReader/scripts/train_it_v100.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # For V100 32G 3 | DIR=`pwd` 4 | export PYTHONPATH=$DIR 5 | DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'` 6 | 7 | if [ $MASTER_ADDR ];then 8 | echo $MASTER_ADDR 9 | echo $MASTER_PORT 10 | echo $WORLD_SIZE 11 | echo $RANK 12 | else 13 | MASTER_ADDR=127.0.0.1 14 | MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15 15 | WORLD_SIZE=1 16 | RANK=0 17 | fi 18 | 19 | DISTRIBUTED_ARGS="--nproc_per_node 1 \ 20 | --nnodes ${WORLD_SIZE} \ 21 | --node_rank ${RANK} \ 22 | --master_addr ${MASTER_ADDR} \ 23 | --master_port ${MASTER_PORT}" 24 | 25 | EXP_NAME=ureader 26 | 27 | max_length=2048 28 | micro_batch_size=1 29 | global_batch_size=256 30 | gradient_accumulation_steps=1 31 | 32 | SAVE_NAME=${ureader}_${max_length}_${global_batch_size} 33 | 34 | SAVE_PATH="./output/${EXP_NAME}/" 35 | TENSORBOARD_PATH="./tensorboard/sft/${SAVE_NAME}/" 36 | 37 | 38 | 39 | # train_iters = total_data * train_epochs // global_batch_size 40 | train_epochs=10 41 | train_iters=25579 42 | 43 | lr_warmup_iters=50 44 | 45 | eval_iter=50 46 | eval_interval=50 47 | save_interval=500 48 | 49 | mkdir -p ${SAVE_PATH} 50 | mkdir -p ${TENSORBOARD_PATH} 51 | 52 | options=" \ 53 | --pretrained-ckpt checkpoints/mplug-owl-llama-7b \ 54 | --seq-length ${max_length} \ 55 | --micro-batch-size ${micro_batch_size} \ 56 | --global-batch-size ${global_batch_size} \ 57 | --num-training-steps ${train_iters} \ 58 | --train-epochs ${train_epochs} \ 59 | --num-warmup-steps ${lr_warmup_iters} \ 60 | --gradient-accumulation-steps ${gradient_accumulation_steps} \ 61 | --lr 1e-4 \ 62 | --min-lr 1e-6 \ 63 | --eval-iters ${eval_iter} \ 64 | --save-interval ${save_interval} \ 65 | --save-path ${SAVE_PATH} \ 66 | --tensorboard-dir ${TENSORBOARD_PATH} \ 67 | --clip-grad 1.0 \ 68 | --weight-decay 0.0001 \ 69 | --adam-beta1 0.9 \ 70 | --adam-beta2 0.999 \ 71 | --num-workers 16 \ 72 | --use-lora \ 73 | --gradient-checkpointing \ 74 | --fp16 \ 75 | --deepspeed ds_config.json" 76 | 77 | multimodal_options=" \ 78 | --mm-config configs/sft/release.yaml 79 | " 80 | 81 | python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log -------------------------------------------------------------------------------- /UReader/serve/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/UReader/serve/__init__.py -------------------------------------------------------------------------------- /UReader/serve/gradio_css.py: -------------------------------------------------------------------------------- 1 | code_highlight_css = ( 2 | """ 3 | #chatbot .hll { background-color: #ffffcc } 4 | #chatbot .c { color: #408080; font-style: italic } 5 | #chatbot .err { border: 1px solid #FF0000 } 6 | #chatbot .k { color: #008000; font-weight: bold } 7 | #chatbot .o { color: #666666 } 8 | #chatbot .ch { color: #408080; font-style: italic } 9 | #chatbot .cm { color: #408080; font-style: italic } 10 | #chatbot .cp { color: #BC7A00 } 11 | #chatbot .cpf { color: #408080; font-style: italic } 12 | #chatbot .c1 { color: #408080; font-style: italic } 13 | #chatbot .cs { color: #408080; font-style: italic } 14 | #chatbot .gd { color: #A00000 } 15 | #chatbot .ge { font-style: italic } 16 | #chatbot .gr { color: #FF0000 } 17 | #chatbot .gh { color: #000080; font-weight: bold } 18 | #chatbot .gi { color: #00A000 } 19 | #chatbot .go { color: #888888 } 20 | #chatbot .gp { color: #000080; font-weight: bold } 21 | #chatbot .gs { font-weight: bold } 22 | #chatbot .gu { color: #800080; font-weight: bold } 23 | #chatbot .gt { color: #0044DD } 24 | #chatbot .kc { color: #008000; font-weight: bold } 25 | #chatbot .kd { color: #008000; font-weight: bold } 26 | #chatbot .kn { color: #008000; font-weight: bold } 27 | #chatbot .kp { color: #008000 } 28 | #chatbot .kr { color: #008000; font-weight: bold } 29 | #chatbot .kt { color: #B00040 } 30 | #chatbot .m { color: #666666 } 31 | #chatbot .s { color: #BA2121 } 32 | #chatbot .na { color: #7D9029 } 33 | #chatbot .nb { color: #008000 } 34 | #chatbot .nc { color: #0000FF; font-weight: bold } 35 | #chatbot .no { color: #880000 } 36 | #chatbot .nd { color: #AA22FF } 37 | #chatbot .ni { color: #999999; font-weight: bold } 38 | #chatbot .ne { color: #D2413A; font-weight: bold } 39 | #chatbot .nf { color: #0000FF } 40 | #chatbot .nl { color: #A0A000 } 41 | #chatbot .nn { color: #0000FF; font-weight: bold } 42 | #chatbot .nt { color: #008000; font-weight: bold } 43 | #chatbot .nv { color: #19177C } 44 | #chatbot .ow { color: #AA22FF; font-weight: bold } 45 | #chatbot .w { color: #bbbbbb } 46 | #chatbot .mb { color: #666666 } 47 | #chatbot .mf { color: #666666 } 48 | #chatbot .mh { color: #666666 } 49 | #chatbot .mi { color: #666666 } 50 | #chatbot .mo { color: #666666 } 51 | #chatbot .sa { color: #BA2121 } 52 | #chatbot .sb { color: #BA2121 } 53 | #chatbot .sc { color: #BA2121 } 54 | #chatbot .dl { color: #BA2121 } 55 | #chatbot .sd { color: #BA2121; font-style: italic } 56 | #chatbot .s2 { color: #BA2121 } 57 | #chatbot .se { color: #BB6622; font-weight: bold } 58 | #chatbot .sh { color: #BA2121 } 59 | #chatbot .si { color: #BB6688; font-weight: bold } 60 | #chatbot .sx { color: #008000 } 61 | #chatbot .sr { color: #BB6688 } 62 | #chatbot .s1 { color: #BA2121 } 63 | #chatbot .ss { color: #19177C } 64 | #chatbot .bp { color: #008000 } 65 | #chatbot .fm { color: #0000FF } 66 | #chatbot .vc { color: #19177C } 67 | #chatbot .vg { color: #19177C } 68 | #chatbot .vi { color: #19177C } 69 | #chatbot .vm { color: #19177C } 70 | #chatbot .il { color: #666666 } 71 | """) 72 | #.highlight { background: #f8f8f8; } 73 | 74 | -------------------------------------------------------------------------------- /UReader/serve/model_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import re 4 | import torch 5 | import transformers 6 | import traceback 7 | 8 | from queue import Queue 9 | from threading import Thread 10 | 11 | 12 | def post_process_output(text): 13 | text = text.strip() 14 | pattern = re.compile( 15 | r"||||\[PAD\]|<\|endoftext\|>|\[UNK\]|\[CLS\]|\[MASK\]|<\|startofpiece\|>|<\|endofpiece\|>|\[gMASK\]|\[sMASK\]" 16 | ) 17 | text = pattern.sub("", text.strip()).strip() 18 | return text 19 | 20 | 21 | def post_process_code(code): 22 | sep = "\n```" 23 | if sep in code: 24 | blocks = code.split(sep) 25 | if len(blocks) % 2 == 1: 26 | for i in range(1, len(blocks), 2): 27 | blocks[i] = blocks[i].replace("\\_", "_") 28 | code = sep.join(blocks) 29 | return code 30 | 31 | 32 | class Stream(transformers.StoppingCriteria): 33 | def __init__(self, callback_func=None): 34 | self.callback_func = callback_func 35 | 36 | def __call__(self, input_ids, scores) -> bool: 37 | if self.callback_func is not None: 38 | self.callback_func(input_ids[0]) 39 | return False 40 | 41 | 42 | class Iteratorize: 43 | 44 | """ 45 | Transforms a function that takes a callback 46 | into a lazy iterator (generator). 47 | """ 48 | 49 | def __init__(self, func, kwargs={}, callback=None): 50 | self.mfunc = func 51 | self.c_callback = callback 52 | self.q = Queue() 53 | self.sentinel = object() 54 | self.kwargs = kwargs 55 | self.stop_now = False 56 | 57 | def _callback(val): 58 | if self.stop_now: 59 | raise ValueError 60 | self.q.put(val) 61 | 62 | def gentask(): 63 | try: 64 | ret = self.mfunc(callback=_callback, **self.kwargs) 65 | except ValueError: 66 | pass 67 | except: 68 | traceback.print_exc() 69 | pass 70 | 71 | self.q.put(self.sentinel) 72 | if self.c_callback: 73 | self.c_callback(ret) 74 | 75 | self.thread = Thread(target=gentask) 76 | self.thread.start() 77 | 78 | def __iter__(self): 79 | return self 80 | 81 | def __next__(self): 82 | obj = self.q.get(True, None) 83 | if obj is self.sentinel: 84 | raise StopIteration 85 | else: 86 | return obj 87 | 88 | def __enter__(self): 89 | return self 90 | 91 | def __exit__(self, exc_type, exc_val, exc_tb): 92 | self.stop_now = True -------------------------------------------------------------------------------- /assets/docowl1.5_chat_case.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/assets/docowl1.5_chat_case.png -------------------------------------------------------------------------------- /assets/docowl2_github_case.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/assets/docowl2_github_case.jpg -------------------------------------------------------------------------------- /assets/huggingface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/assets/huggingface.png -------------------------------------------------------------------------------- /assets/mPLUG_new1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/assets/mPLUG_new1.png -------------------------------------------------------------------------------- /assets/modelscope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-PLUG/mPLUG-DocOwl/f91a76859babfdebe7420db6133b66f06f65efc2/assets/modelscope.png --------------------------------------------------------------------------------