├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── config.yml ├── pull_request_template.md └── workflows │ ├── cla.yml │ ├── cli.yml │ ├── huigui.yml │ ├── python-package.yml │ └── rerun.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── LICENSE.md ├── MinerU_CLA.md ├── README.md ├── README_zh-CN.md ├── SECURITY.md ├── demo ├── batch_demo.py ├── demo.py └── pdfs │ ├── demo1.pdf │ ├── demo2.pdf │ ├── demo3.pdf │ └── small_ocr.pdf ├── docker ├── ascend_npu │ └── Dockerfile ├── china │ └── Dockerfile └── global │ └── Dockerfile ├── docs ├── FAQ_en_us.md ├── FAQ_zh_cn.md ├── README_Ascend_NPU_Acceleration_zh_CN.md ├── README_Ubuntu_CUDA_Acceleration_en_US.md ├── README_Ubuntu_CUDA_Acceleration_zh_CN.md ├── README_Windows_CUDA_Acceleration_en_US.md ├── README_Windows_CUDA_Acceleration_zh_CN.md ├── chemical_knowledge_introduction │ ├── introduction.pdf │ └── introduction.xmind ├── how_to_download_models_en.md ├── how_to_download_models_zh_cn.md ├── images │ ├── MinerU-logo.png │ ├── datalab_logo.png │ ├── flowchart_en.png │ ├── flowchart_zh_cn.png │ ├── layout_example.png │ ├── poly.png │ ├── project_panorama_en.png │ ├── project_panorama_zh_cn.png │ ├── spans_example.png │ └── web_demo_1.png ├── output_file_en_us.md └── output_file_zh_cn.md ├── magic-pdf.template.json ├── magic_pdf ├── __init__.py ├── config │ ├── __init__.py │ ├── constants.py │ ├── drop_reason.py │ ├── drop_tag.py │ ├── enums.py │ ├── exceptions.py │ ├── make_content_config.py │ ├── model_block_type.py │ └── ocr_content_type.py ├── data │ ├── __init__.py │ ├── batch_build_dataset.py │ ├── data_reader_writer │ │ ├── __init__.py │ │ ├── base.py │ │ ├── filebase.py │ │ ├── multi_bucket_s3.py │ │ └── s3.py │ ├── dataset.py │ ├── io │ │ ├── __init__.py │ │ ├── base.py │ │ ├── http.py │ │ └── s3.py │ ├── read_api.py │ ├── schemas.py │ └── utils.py ├── dict2md │ ├── __init__.py │ └── ocr_mkcontent.py ├── filter │ ├── __init__.py │ ├── pdf_classify_by_type.py │ └── pdf_meta_scan.py ├── integrations │ ├── __init__.py │ └── rag │ │ ├── __init__.py │ │ ├── api.py │ │ ├── type.py │ │ └── utils.py ├── libs │ ├── __init__.py │ ├── boxbase.py │ ├── clean_memory.py │ ├── commons.py │ ├── config_reader.py │ ├── convert_utils.py │ ├── coordinate_transform.py │ ├── draw_bbox.py │ ├── hash_utils.py │ ├── json_compressor.py │ ├── language.py │ ├── local_math.py │ ├── markdown_utils.py │ ├── path_utils.py │ ├── pdf_check.py │ ├── pdf_image_tools.py │ ├── performance_stats.py │ ├── safe_filename.py │ └── version.py ├── model │ ├── __init__.py │ ├── batch_analyze.py │ ├── doc_analyze_by_custom_model.py │ ├── magic_model.py │ ├── model_list.py │ ├── pdf_extract_kit.py │ ├── pp_structure_v2.py │ └── sub_modules │ │ ├── __init__.py │ │ ├── language_detection │ │ ├── __init__.py │ │ ├── utils.py │ │ └── yolov11 │ │ │ ├── YOLOv11.py │ │ │ └── __init__.py │ │ ├── layout │ │ ├── __init__.py │ │ ├── doclayout_yolo │ │ │ ├── DocLayoutYOLO.py │ │ │ └── __init__.py │ │ └── layoutlmv3 │ │ │ ├── __init__.py │ │ │ ├── backbone.py │ │ │ ├── beit.py │ │ │ ├── deit.py │ │ │ ├── layoutlmft │ │ │ ├── __init__.py │ │ │ ├── data │ │ │ │ ├── __init__.py │ │ │ │ ├── cord.py │ │ │ │ ├── data_collator.py │ │ │ │ ├── funsd.py │ │ │ │ ├── image_utils.py │ │ │ │ └── xfund.py │ │ │ └── models │ │ │ │ ├── __init__.py │ │ │ │ └── layoutlmv3 │ │ │ │ ├── __init__.py │ │ │ │ ├── configuration_layoutlmv3.py │ │ │ │ ├── modeling_layoutlmv3.py │ │ │ │ ├── tokenization_layoutlmv3.py │ │ │ │ └── tokenization_layoutlmv3_fast.py │ │ │ ├── model_init.py │ │ │ ├── rcnn_vl.py │ │ │ └── visualizer.py │ │ ├── mfd │ │ ├── __init__.py │ │ └── yolov8 │ │ │ ├── YOLOv8.py │ │ │ └── __init__.py │ │ ├── mfr │ │ ├── __init__.py │ │ └── unimernet │ │ │ ├── Unimernet.py │ │ │ ├── __init__.py │ │ │ └── unimernet_hf │ │ │ ├── __init__.py │ │ │ ├── modeling_unimernet.py │ │ │ ├── unimer_mbart │ │ │ ├── __init__.py │ │ │ ├── configuration_unimer_mbart.py │ │ │ ├── modeling_unimer_mbart.py │ │ │ └── tokenization_unimer_mbart.py │ │ │ └── unimer_swin │ │ │ ├── __init__.py │ │ │ ├── configuration_unimer_swin.py │ │ │ ├── image_processing_unimer_swin.py │ │ │ └── modeling_unimer_swin.py │ │ ├── model_init.py │ │ ├── model_utils.py │ │ ├── ocr │ │ ├── __init__.py │ │ └── paddleocr2pytorch │ │ │ ├── __init__.py │ │ │ ├── ocr_utils.py │ │ │ ├── pytorch_paddle.py │ │ │ ├── pytorchocr │ │ │ ├── __init__.py │ │ │ ├── base_ocr_v20.py │ │ │ ├── data │ │ │ │ ├── __init__.py │ │ │ │ └── imaug │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── operators.py │ │ │ ├── modeling │ │ │ │ ├── __init__.py │ │ │ │ ├── architectures │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── base_model.py │ │ │ │ ├── backbones │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── det_mobilenet_v3.py │ │ │ │ │ ├── rec_hgnet.py │ │ │ │ │ ├── rec_lcnetv3.py │ │ │ │ │ ├── rec_mobilenet_v3.py │ │ │ │ │ ├── rec_mv1_enhance.py │ │ │ │ │ ├── rec_pphgnetv2.py │ │ │ │ │ └── rec_svtrnet.py │ │ │ │ ├── common.py │ │ │ │ ├── heads │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── cls_head.py │ │ │ │ │ ├── det_db_head.py │ │ │ │ │ ├── rec_ctc_head.py │ │ │ │ │ └── rec_multi_head.py │ │ │ │ └── necks │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── db_fpn.py │ │ │ │ │ ├── intracl.py │ │ │ │ │ └── rnn.py │ │ │ ├── postprocess │ │ │ │ ├── __init__.py │ │ │ │ ├── cls_postprocess.py │ │ │ │ ├── db_postprocess.py │ │ │ │ └── rec_postprocess.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── resources │ │ │ │ ├── arch_config.yaml │ │ │ │ ├── dict │ │ │ │ ├── arabic_dict.txt │ │ │ │ ├── chinese_cht_dict.txt │ │ │ │ ├── cyrillic_dict.txt │ │ │ │ ├── devanagari_dict.txt │ │ │ │ ├── en_dict.txt │ │ │ │ ├── japan_dict.txt │ │ │ │ ├── ka_dict.txt │ │ │ │ ├── korean_dict.txt │ │ │ │ ├── latin_dict.txt │ │ │ │ ├── ppocr_keys_v1.txt │ │ │ │ ├── ppocrv4_doc_dict.txt │ │ │ │ ├── ppocrv5_dict.txt │ │ │ │ ├── ta_dict.txt │ │ │ │ └── te_dict.txt │ │ │ │ └── models_config.yml │ │ │ └── tools │ │ │ ├── __init__.py │ │ │ └── infer │ │ │ ├── __init__.py │ │ │ ├── predict_cls.py │ │ │ ├── predict_det.py │ │ │ ├── predict_rec.py │ │ │ ├── predict_system.py │ │ │ └── pytorchocr_utility.py │ │ ├── reading_oreder │ │ ├── __init__.py │ │ └── layoutreader │ │ │ ├── __init__.py │ │ │ ├── helpers.py │ │ │ └── xycut.py │ │ └── table │ │ ├── __init__.py │ │ ├── rapidtable │ │ ├── __init__.py │ │ └── rapid_table.py │ │ └── table_utils.py ├── operators │ ├── __init__.py │ ├── models.py │ └── pipes.py ├── pdf_parse_union_core_v2.py ├── post_proc │ ├── __init__.py │ ├── llm_aided.py │ └── para_split_v3.py ├── pre_proc │ ├── __init__.py │ ├── construct_page_dict.py │ ├── cut_image.py │ ├── ocr_detect_all_bboxes.py │ ├── ocr_dict_merge.py │ ├── ocr_span_list_modify.py │ └── remove_bbox_overlap.py ├── resources │ ├── fasttext-langdetect │ │ └── lid.176.ftz │ ├── model_config │ │ └── model_configs.yaml │ ├── slanet_plus │ │ └── slanet-plus.onnx │ └── yolov11-langdetect │ │ └── yolo_v11_ft.pt ├── spark │ ├── __init__.py │ └── spark_api.py ├── tools │ ├── __init__.py │ ├── cli.py │ ├── cli_dev.py │ └── common.py └── utils │ ├── __init__.py │ ├── annotations.py │ └── office_to_pdf.py ├── next_docs ├── en │ ├── .readthedocs.yaml │ ├── Makefile │ ├── _static │ │ └── image │ │ │ ├── MinerU-logo-hq.png │ │ │ ├── MinerU-logo.png │ │ │ ├── ReadTheDocs.svg │ │ │ ├── datalab_logo.png │ │ │ ├── flowchart_en.png │ │ │ ├── flowchart_zh_cn.png │ │ │ ├── inference_result.png │ │ │ ├── layout_example.png │ │ │ ├── logo.png │ │ │ ├── pipeline.drawio.svg │ │ │ ├── poly.png │ │ │ ├── project_panorama_en.png │ │ │ ├── project_panorama_zh_cn.png │ │ │ ├── spans_example.png │ │ │ └── web_demo_1.png │ ├── additional_notes │ │ ├── faq.rst │ │ ├── glossary.rst │ │ └── known_issues.rst │ ├── api.rst │ ├── api │ │ ├── data_reader_writer.rst │ │ ├── dataset.rst │ │ ├── io.rst │ │ ├── model_operators.rst │ │ ├── pipe_operators.rst │ │ ├── read_api.rst │ │ └── schemas.rst │ ├── conf.py │ ├── index.rst │ ├── make.bat │ ├── user_guide.rst │ └── user_guide │ │ ├── data.rst │ │ ├── data │ │ ├── data_reader_writer.rst │ │ ├── dataset.rst │ │ ├── io.rst │ │ └── read_api.rst │ │ ├── inference_result.rst │ │ ├── install.rst │ │ ├── install │ │ ├── boost_with_cuda.rst │ │ ├── config.rst │ │ ├── download_model_weight_files.rst │ │ └── install.rst │ │ ├── pipe_result.rst │ │ ├── quick_start.rst │ │ ├── quick_start │ │ ├── convert_image.rst │ │ ├── convert_ms_office.rst │ │ └── convert_pdf.rst │ │ ├── tutorial.rst │ │ ├── tutorial │ │ ├── output_file_description.rst │ │ └── pipeline.rst │ │ ├── usage.rst │ │ └── usage │ │ ├── api.rst │ │ ├── command_line.rst │ │ └── docker.rst ├── requirements.txt └── zh_cn │ ├── .readthedocs.yaml │ ├── Makefile │ ├── _static │ └── image │ │ ├── MinerU-logo-hq.png │ │ ├── MinerU-logo.png │ │ ├── ReadTheDocs.svg │ │ ├── datalab_logo.png │ │ ├── flowchart_en.png │ │ ├── flowchart_zh_cn.png │ │ ├── inference_result.png │ │ ├── layout_example.png │ │ ├── logo.png │ │ ├── pipeline.drawio.svg │ │ ├── poly.png │ │ ├── project_panorama_en.png │ │ ├── project_panorama_zh_cn.png │ │ ├── spans_example.png │ │ └── web_demo_1.png │ ├── additional_notes │ ├── faq.rst │ ├── glossary.rst │ └── known_issues.rst │ ├── conf.py │ ├── index.rst │ ├── make.bat │ ├── user_guide.rst │ └── user_guide │ ├── data.rst │ ├── data │ ├── data_reader_writer.rst │ ├── dataset.rst │ ├── io.rst │ └── read_api.rst │ ├── install.rst │ ├── install │ ├── boost_with_cuda.rst │ ├── download_model_weight_files.rst │ └── install.rst │ ├── quick_start.rst │ ├── quick_start │ ├── command_line.rst │ └── to_markdown.rst │ ├── tutorial.rst │ └── tutorial │ ├── output_file_description.rst │ └── pipeline.rst ├── projects ├── README.md ├── README_zh-CN.md ├── gradio_app │ ├── README.md │ ├── README_zh-CN.md │ ├── app.py │ ├── examples │ │ ├── 2list_1table.pdf │ │ ├── 3list_1table.pdf │ │ ├── academic_paper_formula.pdf │ │ ├── academic_paper_img_formula.pdf │ │ ├── academic_paper_list.pdf │ │ ├── complex_layout.pdf │ │ ├── complex_layout_para_split_list.pdf │ │ ├── garbled_formula.pdf │ │ ├── magazine_complex_layout_images_list.pdf │ │ └── scanned.pdf │ ├── header.html │ └── requirements.txt ├── llama_index_rag │ ├── README.md │ ├── README_zh-CN.md │ ├── data_ingestion.py │ ├── docker-compose.yml │ ├── example │ │ └── data │ │ │ └── declaration_of_the_rights_of_man_1789.pdf │ ├── query.py │ └── rag_data_api.png ├── multi_gpu │ ├── README.md │ ├── client.py │ └── server.py ├── web │ ├── .gitignore │ ├── README.md │ ├── README_zh-CN.md │ ├── eslint.config.js │ ├── index.html │ ├── package-lock.json │ ├── package.json │ ├── pnpm-lock.yaml │ ├── postcss.config.js │ ├── public │ │ ├── iconfont.js │ │ ├── logo.svg │ │ └── pdfjs-dist │ │ │ ├── build │ │ │ ├── pdf.mjs │ │ │ ├── pdf.worker.mjs │ │ │ └── test.js │ │ │ └── web │ │ │ ├── cmaps │ │ │ ├── 78-EUC-H.bcmap │ │ │ ├── 78-EUC-V.bcmap │ │ │ ├── 78-H.bcmap │ │ │ ├── 78-RKSJ-H.bcmap │ │ │ ├── 78-RKSJ-V.bcmap │ │ │ ├── 78-V.bcmap │ │ │ ├── 78ms-RKSJ-H.bcmap │ │ │ ├── 78ms-RKSJ-V.bcmap │ │ │ ├── 83pv-RKSJ-H.bcmap │ │ │ ├── 90ms-RKSJ-H.bcmap │ │ │ ├── 90ms-RKSJ-V.bcmap │ │ │ ├── 90msp-RKSJ-H.bcmap │ │ │ ├── 90msp-RKSJ-V.bcmap │ │ │ ├── 90pv-RKSJ-H.bcmap │ │ │ ├── 90pv-RKSJ-V.bcmap │ │ │ ├── Add-H.bcmap │ │ │ ├── Add-RKSJ-H.bcmap │ │ │ ├── Add-RKSJ-V.bcmap │ │ │ ├── Add-V.bcmap │ │ │ ├── Adobe-CNS1-0.bcmap │ │ │ ├── Adobe-CNS1-1.bcmap │ │ │ ├── Adobe-CNS1-2.bcmap │ │ │ ├── Adobe-CNS1-3.bcmap │ │ │ ├── Adobe-CNS1-4.bcmap │ │ │ ├── Adobe-CNS1-5.bcmap │ │ │ ├── Adobe-CNS1-6.bcmap │ │ │ ├── Adobe-CNS1-UCS2.bcmap │ │ │ ├── Adobe-GB1-0.bcmap │ │ │ ├── Adobe-GB1-1.bcmap │ │ │ ├── Adobe-GB1-2.bcmap │ │ │ ├── Adobe-GB1-3.bcmap │ │ │ ├── Adobe-GB1-4.bcmap │ │ │ ├── Adobe-GB1-5.bcmap │ │ │ ├── Adobe-GB1-UCS2.bcmap │ │ │ ├── Adobe-Japan1-0.bcmap │ │ │ ├── Adobe-Japan1-1.bcmap │ │ │ ├── Adobe-Japan1-2.bcmap │ │ │ ├── Adobe-Japan1-3.bcmap │ │ │ ├── Adobe-Japan1-4.bcmap │ │ │ ├── Adobe-Japan1-5.bcmap │ │ │ ├── Adobe-Japan1-6.bcmap │ │ │ ├── Adobe-Japan1-UCS2.bcmap │ │ │ ├── Adobe-Korea1-0.bcmap │ │ │ ├── Adobe-Korea1-1.bcmap │ │ │ ├── Adobe-Korea1-2.bcmap │ │ │ ├── Adobe-Korea1-UCS2.bcmap │ │ │ ├── B5-H.bcmap │ │ │ ├── B5-V.bcmap │ │ │ ├── B5pc-H.bcmap │ │ │ ├── B5pc-V.bcmap │ │ │ ├── CNS-EUC-H.bcmap │ │ │ ├── CNS-EUC-V.bcmap │ │ │ ├── CNS1-H.bcmap │ │ │ ├── CNS1-V.bcmap │ │ │ ├── CNS2-H.bcmap │ │ │ ├── CNS2-V.bcmap │ │ │ ├── ETHK-B5-H.bcmap │ │ │ ├── ETHK-B5-V.bcmap │ │ │ ├── ETen-B5-H.bcmap │ │ │ ├── ETen-B5-V.bcmap │ │ │ ├── ETenms-B5-H.bcmap │ │ │ ├── ETenms-B5-V.bcmap │ │ │ ├── EUC-H.bcmap │ │ │ ├── EUC-V.bcmap │ │ │ ├── Ext-H.bcmap │ │ │ ├── Ext-RKSJ-H.bcmap │ │ │ ├── Ext-RKSJ-V.bcmap │ │ │ ├── Ext-V.bcmap │ │ │ ├── GB-EUC-H.bcmap │ │ │ ├── GB-EUC-V.bcmap │ │ │ ├── GB-H.bcmap │ │ │ ├── GB-V.bcmap │ │ │ ├── GBK-EUC-H.bcmap │ │ │ ├── GBK-EUC-V.bcmap │ │ │ ├── GBK2K-H.bcmap │ │ │ ├── GBK2K-V.bcmap │ │ │ ├── GBKp-EUC-H.bcmap │ │ │ ├── GBKp-EUC-V.bcmap │ │ │ ├── GBT-EUC-H.bcmap │ │ │ ├── GBT-EUC-V.bcmap │ │ │ ├── GBT-H.bcmap │ │ │ ├── GBT-V.bcmap │ │ │ ├── GBTpc-EUC-H.bcmap │ │ │ ├── GBTpc-EUC-V.bcmap │ │ │ ├── GBpc-EUC-H.bcmap │ │ │ ├── GBpc-EUC-V.bcmap │ │ │ ├── H.bcmap │ │ │ ├── HKdla-B5-H.bcmap │ │ │ ├── HKdla-B5-V.bcmap │ │ │ ├── HKdlb-B5-H.bcmap │ │ │ ├── HKdlb-B5-V.bcmap │ │ │ ├── HKgccs-B5-H.bcmap │ │ │ ├── HKgccs-B5-V.bcmap │ │ │ ├── HKm314-B5-H.bcmap │ │ │ ├── HKm314-B5-V.bcmap │ │ │ ├── HKm471-B5-H.bcmap │ │ │ ├── HKm471-B5-V.bcmap │ │ │ ├── HKscs-B5-H.bcmap │ │ │ ├── HKscs-B5-V.bcmap │ │ │ ├── Hankaku.bcmap │ │ │ ├── Hiragana.bcmap │ │ │ ├── KSC-EUC-H.bcmap │ │ │ ├── KSC-EUC-V.bcmap │ │ │ ├── KSC-H.bcmap │ │ │ ├── KSC-Johab-H.bcmap │ │ │ ├── KSC-Johab-V.bcmap │ │ │ ├── KSC-V.bcmap │ │ │ ├── KSCms-UHC-H.bcmap │ │ │ ├── KSCms-UHC-HW-H.bcmap │ │ │ ├── KSCms-UHC-HW-V.bcmap │ │ │ ├── KSCms-UHC-V.bcmap │ │ │ ├── KSCpc-EUC-H.bcmap │ │ │ ├── KSCpc-EUC-V.bcmap │ │ │ ├── Katakana.bcmap │ │ │ ├── LICENSE │ │ │ ├── NWP-H.bcmap │ │ │ ├── NWP-V.bcmap │ │ │ ├── RKSJ-H.bcmap │ │ │ ├── RKSJ-V.bcmap │ │ │ ├── Roman.bcmap │ │ │ ├── UniCNS-UCS2-H.bcmap │ │ │ ├── UniCNS-UCS2-V.bcmap │ │ │ ├── UniCNS-UTF16-H.bcmap │ │ │ ├── UniCNS-UTF16-V.bcmap │ │ │ ├── UniCNS-UTF32-H.bcmap │ │ │ ├── UniCNS-UTF32-V.bcmap │ │ │ ├── UniCNS-UTF8-H.bcmap │ │ │ ├── UniCNS-UTF8-V.bcmap │ │ │ ├── UniGB-UCS2-H.bcmap │ │ │ ├── UniGB-UCS2-V.bcmap │ │ │ ├── UniGB-UTF16-H.bcmap │ │ │ ├── UniGB-UTF16-V.bcmap │ │ │ ├── UniGB-UTF32-H.bcmap │ │ │ ├── UniGB-UTF32-V.bcmap │ │ │ ├── UniGB-UTF8-H.bcmap │ │ │ ├── UniGB-UTF8-V.bcmap │ │ │ ├── UniJIS-UCS2-H.bcmap │ │ │ ├── UniJIS-UCS2-HW-H.bcmap │ │ │ ├── UniJIS-UCS2-HW-V.bcmap │ │ │ ├── UniJIS-UCS2-V.bcmap │ │ │ ├── UniJIS-UTF16-H.bcmap │ │ │ ├── UniJIS-UTF16-V.bcmap │ │ │ ├── UniJIS-UTF32-H.bcmap │ │ │ ├── UniJIS-UTF32-V.bcmap │ │ │ ├── UniJIS-UTF8-H.bcmap │ │ │ ├── UniJIS-UTF8-V.bcmap │ │ │ ├── UniJIS2004-UTF16-H.bcmap │ │ │ ├── UniJIS2004-UTF16-V.bcmap │ │ │ ├── UniJIS2004-UTF32-H.bcmap │ │ │ ├── UniJIS2004-UTF32-V.bcmap │ │ │ ├── UniJIS2004-UTF8-H.bcmap │ │ │ ├── UniJIS2004-UTF8-V.bcmap │ │ │ ├── UniJISPro-UCS2-HW-V.bcmap │ │ │ ├── UniJISPro-UCS2-V.bcmap │ │ │ ├── UniJISPro-UTF8-V.bcmap │ │ │ ├── UniJISX0213-UTF32-H.bcmap │ │ │ ├── UniJISX0213-UTF32-V.bcmap │ │ │ ├── UniJISX02132004-UTF32-H.bcmap │ │ │ ├── UniJISX02132004-UTF32-V.bcmap │ │ │ ├── UniKS-UCS2-H.bcmap │ │ │ ├── UniKS-UCS2-V.bcmap │ │ │ ├── UniKS-UTF16-H.bcmap │ │ │ ├── UniKS-UTF16-V.bcmap │ │ │ ├── UniKS-UTF32-H.bcmap │ │ │ ├── UniKS-UTF32-V.bcmap │ │ │ ├── UniKS-UTF8-H.bcmap │ │ │ ├── UniKS-UTF8-V.bcmap │ │ │ ├── V.bcmap │ │ │ └── WP-Symbol.bcmap │ │ │ ├── custom.css │ │ │ ├── custom.js │ │ │ ├── images │ │ │ ├── altText_add.svg │ │ │ ├── altText_done.svg │ │ │ ├── annotation-check.svg │ │ │ ├── annotation-comment.svg │ │ │ ├── annotation-help.svg │ │ │ ├── annotation-insert.svg │ │ │ ├── annotation-key.svg │ │ │ ├── annotation-newparagraph.svg │ │ │ ├── annotation-noicon.svg │ │ │ ├── annotation-note.svg │ │ │ ├── annotation-paperclip.svg │ │ │ ├── annotation-paragraph.svg │ │ │ ├── annotation-pushpin.svg │ │ │ ├── cursor-editorFreeHighlight.svg │ │ │ ├── cursor-editorFreeText.svg │ │ │ ├── cursor-editorInk.svg │ │ │ ├── cursor-editorTextHighlight.svg │ │ │ ├── editor-toolbar-delete.svg │ │ │ ├── findbarButton-next.svg │ │ │ ├── findbarButton-previous.svg │ │ │ ├── gv-toolbarButton-download.svg │ │ │ ├── layer-button-show.svg │ │ │ ├── layer-button.svg │ │ │ ├── loading-icon.gif │ │ │ ├── loading.svg │ │ │ ├── secondaryToolbarButton-documentProperties.svg │ │ │ ├── secondaryToolbarButton-firstPage.svg │ │ │ ├── secondaryToolbarButton-handTool.svg │ │ │ ├── secondaryToolbarButton-lastPage.svg │ │ │ ├── secondaryToolbarButton-rotateCcw.svg │ │ │ ├── secondaryToolbarButton-rotateCw.svg │ │ │ ├── secondaryToolbarButton-scrollHorizontal.svg │ │ │ ├── secondaryToolbarButton-scrollPage.svg │ │ │ ├── secondaryToolbarButton-scrollVertical.svg │ │ │ ├── secondaryToolbarButton-scrollWrapped.svg │ │ │ ├── secondaryToolbarButton-selectTool.svg │ │ │ ├── secondaryToolbarButton-spreadEven.svg │ │ │ ├── secondaryToolbarButton-spreadNone.svg │ │ │ ├── secondaryToolbarButton-spreadOdd.svg │ │ │ ├── toolbarButton-bookmark.svg │ │ │ ├── toolbarButton-download.svg │ │ │ ├── toolbarButton-editorFreeText.svg │ │ │ ├── toolbarButton-editorHighlight.svg │ │ │ ├── toolbarButton-editorInk.svg │ │ │ ├── toolbarButton-editorStamp.svg │ │ │ ├── toolbarButton-menuArrow.svg │ │ │ ├── toolbarButton-openFile.svg │ │ │ ├── toolbarButton-pageDown.svg │ │ │ ├── toolbarButton-pageUp.svg │ │ │ ├── toolbarButton-presentationMode.svg │ │ │ ├── toolbarButton-print.svg │ │ │ ├── toolbarButton-search.svg │ │ │ ├── toolbarButton-secondaryToolbarToggle.svg │ │ │ ├── toolbarButton-sidebarToggle-open.svg │ │ │ ├── toolbarButton-sidebarToggle.svg │ │ │ ├── toolbarButton-viewAttachments.svg │ │ │ ├── toolbarButton-viewLayers.svg │ │ │ ├── toolbarButton-viewOutline.svg │ │ │ ├── toolbarButton-viewThumbnail.svg │ │ │ ├── toolbarButton-zoomIn.svg │ │ │ ├── toolbarButton-zoomOut.svg │ │ │ ├── treeitem-collapsed.svg │ │ │ └── treeitem-expanded.svg │ │ │ ├── layer.js │ │ │ ├── standard_fonts │ │ │ ├── FoxitDingbats.pfb │ │ │ ├── FoxitFixed.pfb │ │ │ ├── FoxitFixedBold.pfb │ │ │ ├── FoxitFixedBoldItalic.pfb │ │ │ ├── FoxitFixedItalic.pfb │ │ │ ├── FoxitSerif.pfb │ │ │ ├── FoxitSerifBold.pfb │ │ │ ├── FoxitSerifBoldItalic.pfb │ │ │ ├── FoxitSerifItalic.pfb │ │ │ ├── FoxitSymbol.pfb │ │ │ ├── LICENSE_FOXIT │ │ │ ├── LICENSE_LIBERATION │ │ │ ├── LiberationSans-Bold.ttf │ │ │ ├── LiberationSans-BoldItalic.ttf │ │ │ ├── LiberationSans-Italic.ttf │ │ │ └── LiberationSans-Regular.ttf │ │ │ ├── viewer.css │ │ │ ├── viewer.html │ │ │ └── viewer.mjs │ ├── src │ │ ├── App.css │ │ ├── App.tsx │ │ ├── api │ │ │ ├── extract.ts │ │ │ ├── http.ts │ │ │ └── oss.ts │ │ ├── assets │ │ │ ├── imgs │ │ │ │ └── online.experience │ │ │ │ │ ├── EmbedOutlined-active.svg │ │ │ │ │ ├── EmbedOutlined.svg │ │ │ │ │ ├── GradientGitHub.svg │ │ │ │ │ ├── PdfOutlined-active.svg │ │ │ │ │ ├── PdfOutlined.svg │ │ │ │ │ ├── UploadingOutlined.svg │ │ │ │ │ └── file-upload-bg.svg │ │ │ ├── pdf │ │ │ │ ├── comingSoonLayer.svg │ │ │ │ ├── exitFullScreen.svg │ │ │ │ ├── extractor-formula.svg │ │ │ │ ├── extractor-hidden-layer.svg │ │ │ │ ├── extractor-pdf.svg │ │ │ │ ├── extractor-queue.svg │ │ │ │ ├── extractor-show-layer.svg │ │ │ │ ├── extractor-table.svg │ │ │ │ ├── fullScreen.svg │ │ │ │ ├── github.svg │ │ │ │ ├── guideTools.svg │ │ │ │ ├── label-llm.svg │ │ │ │ ├── labelU.svg │ │ │ │ ├── lang-change.svg │ │ │ │ ├── odl-logo.svg │ │ │ │ ├── pdf-upload-item-1.svg │ │ │ │ ├── pdf-upload-item-2.svg │ │ │ │ ├── pdf-upload-item-3.svg │ │ │ │ └── pdf-upload.png │ │ │ ├── react.svg │ │ │ └── svg │ │ │ │ ├── empty.svg │ │ │ │ └── logo.svg │ │ ├── components │ │ │ ├── SaveStatus.tsx │ │ │ ├── code-mirror │ │ │ │ ├── index.module.scss │ │ │ │ └── index.tsx │ │ │ ├── error-boundary.tsx │ │ │ ├── icon-font.tsx │ │ │ ├── loading-animation │ │ │ │ ├── index.tsx │ │ │ │ └── loadingAnimation.module.scss │ │ │ ├── text-tooltip │ │ │ │ ├── index.module.scss │ │ │ │ └── index.tsx │ │ │ └── upload │ │ │ │ └── index.tsx │ │ ├── constant │ │ │ ├── event.ts │ │ │ ├── index.tsx │ │ │ ├── pdf-color-picker.ts │ │ │ ├── route.ts │ │ │ └── storage.ts │ │ ├── context │ │ │ ├── language-provider.tsx │ │ │ └── query-provider.tsx │ │ ├── index.css │ │ ├── locale │ │ │ ├── common │ │ │ │ ├── en.json │ │ │ │ └── zh.json │ │ │ ├── en.json │ │ │ ├── side │ │ │ │ ├── en.ts │ │ │ │ └── zh.ts │ │ │ └── zh.json │ │ ├── main.tsx │ │ ├── pages │ │ │ ├── extract-side │ │ │ │ ├── index.module.scss │ │ │ │ └── index.tsx │ │ │ ├── extract │ │ │ │ ├── components │ │ │ │ │ ├── extractor-guide │ │ │ │ │ │ ├── index.module.scss │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── extractor-lang │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── extractor-queue │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── extractor-repo │ │ │ │ │ │ ├── index.module.scss │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── iframe-loading │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── image-layer-viwer │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── latex-renderer │ │ │ │ │ │ ├── index.module.scss │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── loading-icon │ │ │ │ │ │ ├── index.module.scss │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── md-viewer │ │ │ │ │ │ ├── index.module.scss │ │ │ │ │ │ ├── index.tsx │ │ │ │ │ │ └── test.md │ │ │ │ │ ├── pdf-extraction │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── pdf-upload-button │ │ │ │ │ │ ├── index.module.scss │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── pdf-upload │ │ │ │ │ │ ├── index.module.scss │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── pdf-viewer │ │ │ │ │ │ └── index.tsx │ │ │ │ │ └── url-markdown │ │ │ │ │ │ ├── index.module.scss │ │ │ │ │ │ └── index.tsx │ │ │ │ ├── extractor-home │ │ │ │ │ ├── index.module.scss │ │ │ │ │ └── index.tsx │ │ │ │ ├── formula │ │ │ │ │ ├── formula-detail-left │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── formula-detail-right │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── formula-detail │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── formula-popover │ │ │ │ │ │ ├── index.module.scss │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── formula-upload │ │ │ │ │ │ ├── index.module.scss │ │ │ │ │ │ └── index.tsx │ │ │ │ │ ├── index.module.scss │ │ │ │ │ └── index.tsx │ │ │ │ ├── index.module.scss │ │ │ │ ├── index.tsx │ │ │ │ └── table │ │ │ │ │ ├── index.tsx │ │ │ │ │ └── table-detail │ │ │ │ │ └── index.tsx │ │ │ ├── home.module.scss │ │ │ └── home.tsx │ │ ├── routes │ │ │ └── index.tsx │ │ ├── store │ │ │ ├── jobProgress.ts │ │ │ ├── languageStore.ts │ │ │ └── mdStore.ts │ │ ├── styles │ │ │ └── variable.scss │ │ ├── types │ │ │ └── extract-task-type.ts │ │ ├── utils │ │ │ ├── download.ts │ │ │ ├── locale.ts │ │ │ └── windowOpen.ts │ │ └── vite-env.d.ts │ ├── tailwind.config.js │ ├── tsconfig.app.json │ ├── tsconfig.json │ ├── tsconfig.node.json │ └── vite.config.ts ├── web_api │ ├── Dockerfile │ ├── README.md │ ├── app.py │ ├── download_models.py │ ├── entrypoint.sh │ ├── magic-pdf.json │ └── requirements.txt └── web_demo │ ├── README.md │ ├── README_zh-CN.md │ ├── images │ └── web_demo_1.png │ ├── poetry.lock │ ├── pyproject.toml │ ├── requirements.txt │ ├── tests │ └── __init__.py │ └── web_demo │ ├── __init__.py │ ├── api │ ├── __init__.py │ ├── analysis │ │ ├── __init__.py │ │ ├── analysis_view.py │ │ ├── ext.py │ │ ├── formula_ext.py │ │ ├── img_md_view.py │ │ ├── markdown_view.py │ │ ├── models.py │ │ ├── pdf_ext.py │ │ ├── serialization.py │ │ ├── task_view.py │ │ └── upload_view.py │ ├── extentions.py │ └── react_app │ │ ├── __init__.py │ │ └── react_app_view.py │ ├── app.py │ ├── common │ ├── __init__.py │ ├── custom_response.py │ ├── error_types.py │ ├── ext.py │ ├── import_models.py │ ├── logger.py │ ├── mk_markdown │ │ ├── __init__.py │ │ ├── libs │ │ │ ├── __init__.py │ │ │ ├── language.py │ │ │ ├── markdown_utils.py │ │ │ └── ocr_content_type.py │ │ ├── mk_markdown.py │ │ └── resources │ │ │ └── fasttext-langdetect │ │ │ └── lid.176.ftz │ └── web_hook.py │ ├── config │ ├── __init__.py │ ├── config.yaml │ └── mineru_web.db │ └── static │ └── __init__.py ├── requirements-qa.txt ├── requirements.txt ├── scripts ├── download_models.py └── download_models_hf.py ├── setup.py ├── signatures └── version1 │ └── cla.json ├── tests ├── clean_coverage.py ├── get_coverage.py ├── retry_env.sh ├── test_cli │ ├── conf │ │ ├── __init__py │ │ └── conf.py │ ├── conftest.py │ ├── lib │ │ ├── __init__.py │ │ ├── calculate_score.py │ │ ├── common.py │ │ ├── pre_clean.py │ │ └── scoring.py │ ├── magic-pdf.json │ ├── pdf_dev │ │ ├── doc │ │ │ └── test_mineru.docx │ │ ├── images │ │ │ └── docstructbench.jpg │ │ ├── line1.jsonl │ │ ├── pdf │ │ │ └── test_rearch_report.pdf │ │ ├── ppt │ │ │ └── small.pptx │ │ ├── result.json │ │ └── test_model.json │ ├── test_bench.py │ ├── test_bench_gpu.py │ └── test_cli_sdk.py └── unittest │ ├── test_data │ ├── __init__.py │ ├── assets │ │ ├── jsonl │ │ │ ├── test_01.jsonl │ │ │ └── test_02.jsonl │ │ ├── pdfs │ │ │ ├── test_01.pdf │ │ │ └── test_02.pdf │ │ └── pngs │ │ │ ├── test_01.png │ │ │ └── test_02.png │ ├── data_reader_writer │ │ ├── __init__.py │ │ ├── test_filebase.py │ │ ├── test_multi_bucket_s3.py │ │ └── test_s3.py │ ├── io │ │ ├── __init__.py │ │ └── test_s3.py │ ├── test_dataset.py │ ├── test_json_compressor.py │ └── test_read_api.py │ ├── test_integrations │ └── test_rag │ │ ├── assets │ │ ├── middle.json │ │ ├── one_page_with_table_image.2.pdf │ │ └── one_page_with_table_image.pdf │ │ ├── test_api.py │ │ └── test_utils.py │ ├── test_metascan_classify │ ├── test_classify.py.bak │ ├── test_commons.py.bak │ ├── test_meta_scan.py.bak │ └── test_metascan_classify_data.json │ ├── test_model │ ├── __init__.py │ ├── assets │ │ ├── test_01.model.json │ │ ├── test_01.pdf │ │ ├── test_02.model.json │ │ └── test_02.pdf │ └── test_magic_model.py │ ├── test_table │ ├── assets │ │ └── table.jpg │ └── test_rapidtable.py │ ├── test_tools │ ├── __init__.py │ ├── assets │ │ ├── cli │ │ │ ├── path │ │ │ │ ├── cli_test_01.pdf │ │ │ │ └── cli_test_02.pdf │ │ │ └── pdf │ │ │ │ └── cli_test_01.pdf │ │ ├── cli_dev │ │ │ ├── cli_test_01.jsonl │ │ │ ├── cli_test_01.model.json │ │ │ └── cli_test_01.pdf │ │ └── common │ │ │ └── cli_test_01.pdf │ ├── test_cli.py │ ├── test_cli_dev.py │ └── test_common.py │ └── test_unit.py └── update_version.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.js linguist-vendored 2 | *.mjs linguist-vendored 3 | *.html linguist-documentation 4 | *.css linguist-vendored 5 | *.scss linguist-vendored -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: 🙏 Q&A 4 | url: https://github.com/opendatalab/MinerU/discussions/categories/q-a 5 | about: Ask the community for help 6 | - name: 💡 Feature requests and ideas 7 | url: https://github.com/opendatalab/MinerU/discussions/categories/ideas 8 | about: Share ideas for new features 9 | - name: 🙌 Show and tell 10 | url: https://github.com/opendatalab/MinerU/discussions/categories/show-and-tell 11 | about: Show off something you've made -------------------------------------------------------------------------------- /.github/workflows/rerun.yml: -------------------------------------------------------------------------------- 1 | name: check-status 2 | 3 | on: 4 | workflow_run: 5 | workflows: [ci] 6 | types: [completed] 7 | 8 | jobs: 9 | on-failure: 10 | runs-on: pdf 11 | permissions: 12 | actions: write 13 | if: ${{ (github.event.workflow_run.head_branch == 'master') && github.event.workflow_run.conclusion == 'failure' && github.event.workflow_run.run_attempt < 3 }} 14 | steps: 15 | - run: | 16 | echo 'The triggering workflow failed' 17 | sleep 600 18 | curl -L \ 19 | -X POST \ 20 | -H "Accept: application/vnd.github+json" \ 21 | -H "Authorization: Bearer ${{ github.token }}" \ 22 | -H "X-GitHub-Api-Version: 2022-11-28" \ 23 | https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.event.workflow_run.id }}/rerun-failed-jobs 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.tar 2 | *.tar.gz 3 | *.zip 4 | venv*/ 5 | envs/ 6 | slurm_logs/ 7 | 8 | sync1.sh 9 | data_preprocess_pj1 10 | data-preparation1 11 | __pycache__ 12 | *.log 13 | *.pyc 14 | .vscode 15 | debug/ 16 | *.ipynb 17 | .idea 18 | 19 | # vscode history 20 | .history 21 | 22 | .DS_Store 23 | .env 24 | 25 | bad_words/ 26 | bak/ 27 | 28 | app/tests/* 29 | temp/ 30 | tmp/ 31 | tmp 32 | .vscode 33 | .vscode/ 34 | ocr_demo 35 | .coveragerc 36 | /app/common/__init__.py 37 | /magic_pdf/config/__init__.py 38 | source.dev.env 39 | 40 | tmp 41 | 42 | projects/web/node_modules 43 | projects/web/dist 44 | 45 | projects/web_demo/web_demo/static/ 46 | cli_debug/ 47 | debug_utils/ 48 | 49 | # sphinx docs 50 | _build/ 51 | 52 | 53 | output/ -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.10" 7 | 8 | formats: 9 | - epub 10 | 11 | python: 12 | install: 13 | - requirements: next_docs/zh_cn/requirements.txt 14 | 15 | sphinx: 16 | configuration: next_docs/zh_cn/conf.py 17 | -------------------------------------------------------------------------------- /demo/batch_demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from magic_pdf.data.batch_build_dataset import batch_build_dataset 4 | from magic_pdf.tools.common import batch_do_parse 5 | 6 | 7 | def batch(pdf_dir, output_dir, method, lang): 8 | os.makedirs(output_dir, exist_ok=True) 9 | doc_paths = [] 10 | for doc_path in Path(pdf_dir).glob('*'): 11 | if doc_path.suffix == '.pdf': 12 | doc_paths.append(doc_path) 13 | 14 | # build dataset with 2 workers 15 | datasets = batch_build_dataset(doc_paths, 4, lang) 16 | 17 | # os.environ["MINERU_MIN_BATCH_INFERENCE_SIZE"] = "200" # every 200 pages will be parsed in one batch 18 | batch_do_parse(output_dir, [str(doc_path.stem) for doc_path in doc_paths], datasets, method) 19 | 20 | 21 | if __name__ == '__main__': 22 | batch("pdfs", "output", "auto", "") 23 | 24 | -------------------------------------------------------------------------------- /demo/pdfs/demo1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/demo/pdfs/demo1.pdf -------------------------------------------------------------------------------- /demo/pdfs/demo2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/demo/pdfs/demo2.pdf -------------------------------------------------------------------------------- /demo/pdfs/demo3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/demo/pdfs/demo3.pdf -------------------------------------------------------------------------------- /demo/pdfs/small_ocr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/demo/pdfs/small_ocr.pdf -------------------------------------------------------------------------------- /docs/chemical_knowledge_introduction/introduction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/chemical_knowledge_introduction/introduction.pdf -------------------------------------------------------------------------------- /docs/chemical_knowledge_introduction/introduction.xmind: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/chemical_knowledge_introduction/introduction.xmind -------------------------------------------------------------------------------- /docs/images/MinerU-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/images/MinerU-logo.png -------------------------------------------------------------------------------- /docs/images/datalab_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/images/datalab_logo.png -------------------------------------------------------------------------------- /docs/images/flowchart_en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/images/flowchart_en.png -------------------------------------------------------------------------------- /docs/images/flowchart_zh_cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/images/flowchart_zh_cn.png -------------------------------------------------------------------------------- /docs/images/layout_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/images/layout_example.png -------------------------------------------------------------------------------- /docs/images/poly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/images/poly.png -------------------------------------------------------------------------------- /docs/images/project_panorama_en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/images/project_panorama_en.png -------------------------------------------------------------------------------- /docs/images/project_panorama_zh_cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/images/project_panorama_zh_cn.png -------------------------------------------------------------------------------- /docs/images/spans_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/images/spans_example.png -------------------------------------------------------------------------------- /docs/images/web_demo_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/docs/images/web_demo_1.png -------------------------------------------------------------------------------- /magic_pdf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/__init__.py -------------------------------------------------------------------------------- /magic_pdf/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/config/__init__.py -------------------------------------------------------------------------------- /magic_pdf/config/drop_tag.py: -------------------------------------------------------------------------------- 1 | 2 | COLOR_BG_HEADER_TXT_BLOCK = 'color_background_header_txt_block' 3 | PAGE_NO = 'page-no' # 页码 4 | CONTENT_IN_FOOT_OR_HEADER = 'in-foot-header-area' # 页眉页脚内的文本 5 | VERTICAL_TEXT = 'vertical-text' # 垂直文本 6 | ROTATE_TEXT = 'rotate-text' # 旋转文本 7 | EMPTY_SIDE_BLOCK = 'empty-side-block' # 边缘上的空白没有任何内容的block 8 | ON_IMAGE_TEXT = 'on-image-text' # 文本在图片上 9 | ON_TABLE_TEXT = 'on-table-text' # 文本在表格上 10 | 11 | 12 | class DropTag: 13 | PAGE_NUMBER = 'page_no' 14 | HEADER = 'header' 15 | FOOTER = 'footer' 16 | FOOTNOTE = 'footnote' 17 | NOT_IN_LAYOUT = 'not_in_layout' 18 | SPAN_OVERLAP = 'span_overlap' 19 | BLOCK_OVERLAP = 'block_overlap' 20 | -------------------------------------------------------------------------------- /magic_pdf/config/enums.py: -------------------------------------------------------------------------------- 1 | 2 | import enum 3 | 4 | 5 | class SupportedPdfParseMethod(enum.Enum): 6 | OCR = 'ocr' 7 | TXT = 'txt' 8 | -------------------------------------------------------------------------------- /magic_pdf/config/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | class FileNotExisted(Exception): 3 | 4 | def __init__(self, path): 5 | self.path = path 6 | 7 | def __str__(self): 8 | return f'File {self.path} does not exist.' 9 | 10 | 11 | class InvalidConfig(Exception): 12 | def __init__(self, msg): 13 | self.msg = msg 14 | 15 | def __str__(self): 16 | return f'Invalid config: {self.msg}' 17 | 18 | 19 | class InvalidParams(Exception): 20 | def __init__(self, msg): 21 | self.msg = msg 22 | 23 | def __str__(self): 24 | return f'Invalid params: {self.msg}' 25 | 26 | 27 | class EmptyData(Exception): 28 | def __init__(self, msg): 29 | self.msg = msg 30 | 31 | def __str__(self): 32 | return f'Empty data: {self.msg}' 33 | 34 | class CUDA_NOT_AVAILABLE(Exception): 35 | def __init__(self, msg): 36 | self.msg = msg 37 | 38 | def __str__(self): 39 | return f'CUDA not available: {self.msg}' -------------------------------------------------------------------------------- /magic_pdf/config/make_content_config.py: -------------------------------------------------------------------------------- 1 | class MakeMode: 2 | MM_MD = 'mm_markdown' 3 | NLP_MD = 'nlp_markdown' 4 | STANDARD_FORMAT = 'standard_format' 5 | 6 | 7 | class DropMode: 8 | WHOLE_PDF = 'whole_pdf' 9 | SINGLE_PAGE = 'single_page' 10 | NONE = 'none' 11 | NONE_WITH_REASON = 'none_with_reason' 12 | -------------------------------------------------------------------------------- /magic_pdf/config/model_block_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ModelBlockTypeEnum(Enum): 5 | TITLE = 0 6 | PLAIN_TEXT = 1 7 | ABANDON = 2 8 | ISOLATE_FORMULA = 8 9 | EMBEDDING = 13 10 | ISOLATED = 14 11 | -------------------------------------------------------------------------------- /magic_pdf/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/data/__init__.py -------------------------------------------------------------------------------- /magic_pdf/data/data_reader_writer/__init__.py: -------------------------------------------------------------------------------- 1 | from magic_pdf.data.data_reader_writer.filebase import \ 2 | FileBasedDataReader # noqa: F401 3 | from magic_pdf.data.data_reader_writer.filebase import \ 4 | FileBasedDataWriter # noqa: F401 5 | from magic_pdf.data.data_reader_writer.multi_bucket_s3 import \ 6 | MultiBucketS3DataReader # noqa: F401 7 | from magic_pdf.data.data_reader_writer.multi_bucket_s3 import \ 8 | MultiBucketS3DataWriter # noqa: F401 9 | from magic_pdf.data.data_reader_writer.s3 import S3DataReader # noqa: F401 10 | from magic_pdf.data.data_reader_writer.s3 import S3DataWriter # noqa: F401 11 | from magic_pdf.data.data_reader_writer.base import DataReader # noqa: F401 12 | from magic_pdf.data.data_reader_writer.base import DataWriter # noqa: F401 -------------------------------------------------------------------------------- /magic_pdf/data/io/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from magic_pdf.data.io.base import IOReader, IOWriter # noqa: F401 3 | from magic_pdf.data.io.http import HttpReader, HttpWriter # noqa: F401 4 | from magic_pdf.data.io.s3 import S3Reader, S3Writer # noqa: F401 5 | 6 | __all__ = ['IOReader', 'IOWriter', 'HttpReader', 'HttpWriter', 'S3Reader', 'S3Writer'] -------------------------------------------------------------------------------- /magic_pdf/data/schemas.py: -------------------------------------------------------------------------------- 1 | 2 | from pydantic import BaseModel, Field 3 | 4 | 5 | class S3Config(BaseModel): 6 | """S3 config 7 | """ 8 | bucket_name: str = Field(description='s3 bucket name', min_length=1) 9 | access_key: str = Field(description='s3 access key', min_length=1) 10 | secret_key: str = Field(description='s3 secret key', min_length=1) 11 | endpoint_url: str = Field(description='s3 endpoint url', min_length=1) 12 | addressing_style: str = Field(description='s3 addressing style', default='auto', min_length=1) 13 | 14 | 15 | class PageInfo(BaseModel): 16 | """The width and height of page 17 | """ 18 | w: float = Field(description='the width of page') 19 | h: float = Field(description='the height of page') 20 | -------------------------------------------------------------------------------- /magic_pdf/dict2md/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/dict2md/__init__.py -------------------------------------------------------------------------------- /magic_pdf/integrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/integrations/__init__.py -------------------------------------------------------------------------------- /magic_pdf/integrations/rag/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/integrations/rag/__init__.py -------------------------------------------------------------------------------- /magic_pdf/libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/libs/__init__.py -------------------------------------------------------------------------------- /magic_pdf/libs/clean_memory.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Opendatalab. All rights reserved. 2 | import torch 3 | import gc 4 | 5 | 6 | def clean_memory(device='cuda'): 7 | if device == 'cuda': 8 | if torch.cuda.is_available(): 9 | torch.cuda.empty_cache() 10 | torch.cuda.ipc_collect() 11 | elif str(device).startswith("npu"): 12 | import torch_npu 13 | if torch_npu.npu.is_available(): 14 | torch_npu.npu.empty_cache() 15 | elif str(device).startswith("mps"): 16 | torch.mps.empty_cache() 17 | gc.collect() -------------------------------------------------------------------------------- /magic_pdf/libs/convert_utils.py: -------------------------------------------------------------------------------- 1 | def dict_to_list(input_dict): 2 | items_list = [] 3 | for _, item in input_dict.items(): 4 | items_list.append(item) 5 | return items_list 6 | -------------------------------------------------------------------------------- /magic_pdf/libs/coordinate_transform.py: -------------------------------------------------------------------------------- 1 | def get_scale_ratio(model_page_info, page): 2 | pix = page.get_pixmap(dpi=72) 3 | pymu_width = int(pix.w) 4 | pymu_height = int(pix.h) 5 | width_from_json = model_page_info['page_info']['width'] 6 | height_from_json = model_page_info['page_info']['height'] 7 | horizontal_scale_ratio = width_from_json / pymu_width 8 | vertical_scale_ratio = height_from_json / pymu_height 9 | return horizontal_scale_ratio, vertical_scale_ratio 10 | -------------------------------------------------------------------------------- /magic_pdf/libs/hash_utils.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | 4 | def compute_md5(file_bytes): 5 | hasher = hashlib.md5() 6 | hasher.update(file_bytes) 7 | return hasher.hexdigest().upper() 8 | 9 | 10 | def compute_sha256(input_string): 11 | hasher = hashlib.sha256() 12 | # 在Python3中,需要将字符串转化为字节对象才能被哈希函数处理 13 | input_bytes = input_string.encode('utf-8') 14 | hasher.update(input_bytes) 15 | return hasher.hexdigest() 16 | -------------------------------------------------------------------------------- /magic_pdf/libs/json_compressor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import brotli 3 | import base64 4 | 5 | class JsonCompressor: 6 | 7 | @staticmethod 8 | def compress_json(data): 9 | """ 10 | Compress a json object and encode it with base64 11 | """ 12 | json_str = json.dumps(data) 13 | json_bytes = json_str.encode('utf-8') 14 | compressed = brotli.compress(json_bytes, quality=6) 15 | compressed_str = base64.b64encode(compressed).decode('utf-8') # convert bytes to string 16 | return compressed_str 17 | 18 | @staticmethod 19 | def decompress_json(compressed_str): 20 | """ 21 | Decode the base64 string and decompress the json object 22 | """ 23 | compressed = base64.b64decode(compressed_str.encode('utf-8')) # convert string to bytes 24 | decompressed_bytes = brotli.decompress(compressed) 25 | json_str = decompressed_bytes.decode('utf-8') 26 | data = json.loads(json_str) 27 | return data 28 | -------------------------------------------------------------------------------- /magic_pdf/libs/local_math.py: -------------------------------------------------------------------------------- 1 | def float_gt(a, b): 2 | if 0.0001 >= abs(a -b): 3 | return False 4 | return a > b 5 | 6 | def float_equal(a, b): 7 | if 0.0001 >= abs(a-b): 8 | return True 9 | return False -------------------------------------------------------------------------------- /magic_pdf/libs/markdown_utils.py: -------------------------------------------------------------------------------- 1 | 2 | def ocr_escape_special_markdown_char(content): 3 | """ 4 | 转义正文里对markdown语法有特殊意义的字符 5 | """ 6 | special_chars = ["*", "`", "~", "$"] 7 | for char in special_chars: 8 | content = content.replace(char, "\\" + char) 9 | 10 | return content 11 | -------------------------------------------------------------------------------- /magic_pdf/libs/safe_filename.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def sanitize_filename(filename, replacement="_"): 5 | if os.name == 'nt': 6 | invalid_chars = '<>:"|?*' 7 | 8 | for char in invalid_chars: 9 | filename = filename.replace(char, replacement) 10 | 11 | return filename 12 | -------------------------------------------------------------------------------- /magic_pdf/libs/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.3.12" 2 | -------------------------------------------------------------------------------- /magic_pdf/model/__init__.py: -------------------------------------------------------------------------------- 1 | __use_inside_model__ = True 2 | __model_mode__ = 'full' -------------------------------------------------------------------------------- /magic_pdf/model/model_list.py: -------------------------------------------------------------------------------- 1 | class MODEL: 2 | Paddle = "pp_structure_v2" 3 | PEK = "pdf_extract_kit" 4 | 5 | 6 | class AtomicModel: 7 | Layout = "layout" 8 | MFD = "mfd" 9 | MFR = "mfr" 10 | OCR = "ocr" 11 | Table = "table" 12 | LangDetect = "langdetect" 13 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/language_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Opendatalab. All rights reserved. 2 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Opendatalab. All rights reserved. 2 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/layout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/layout/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import ( 2 | LayoutLMv3Config, 3 | LayoutLMv3ForTokenClassification, 4 | LayoutLMv3ForQuestionAnswering, 5 | LayoutLMv3ForSequenceClassification, 6 | LayoutLMv3Tokenizer, 7 | ) 8 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .data_collator import DataCollatorForKeyValueExtraction 3 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .layoutlmv3 import ( 2 | LayoutLMv3Config, 3 | LayoutLMv3ForTokenClassification, 4 | LayoutLMv3ForQuestionAnswering, 5 | LayoutLMv3ForSequenceClassification, 6 | LayoutLMv3Tokenizer, 7 | ) 8 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/mfd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/mfd/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/mfd/yolov8/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/mfd/yolov8/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/mfr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/mfr/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/mfr/unimernet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/mfr/unimernet/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py: -------------------------------------------------------------------------------- 1 | from .unimer_swin import UnimerSwinConfig, UnimerSwinModel, UnimerSwinImageProcessor 2 | from .unimer_mbart import UnimerMBartConfig, UnimerMBartModel, UnimerMBartForCausalLM 3 | from .modeling_unimernet import UnimernetModel 4 | 5 | __all__ = [ 6 | "UnimerSwinConfig", 7 | "UnimerSwinModel", 8 | "UnimerSwinImageProcessor", 9 | "UnimerMBartConfig", 10 | "UnimerMBartModel", 11 | "UnimerMBartForCausalLM", 12 | "UnimernetModel", 13 | ] 14 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py: -------------------------------------------------------------------------------- 1 | from .configuration_unimer_mbart import UnimerMBartConfig 2 | from .modeling_unimer_mbart import UnimerMBartModel, UnimerMBartForCausalLM 3 | 4 | __all__ = [ 5 | "UnimerMBartConfig", 6 | "UnimerMBartModel", 7 | "UnimerMBartForCausalLM", 8 | ] 9 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py: -------------------------------------------------------------------------------- 1 | from .configuration_unimer_swin import UnimerSwinConfig 2 | from .modeling_unimer_swin import UnimerSwinModel 3 | from .image_processing_unimer_swin import UnimerSwinImageProcessor 4 | 5 | __all__ = [ 6 | "UnimerSwinConfig", 7 | "UnimerSwinModel", 8 | "UnimerSwinImageProcessor", 9 | ] 10 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/ocr/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Opendatalab. All rights reserved. 2 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from .imaug import transform, create_operators 7 | 8 | 9 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import copy 16 | 17 | __all__ = ["build_model"] 18 | 19 | 20 | def build_model(config, **kwargs): 21 | from .base_model import BaseModel 22 | 23 | config = copy.deepcopy(config) 24 | module_class = BaseModel(config, **kwargs) 25 | return module_class 26 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | 5 | 6 | class ClsHead(nn.Module): 7 | """ 8 | Class orientation 9 | Args: 10 | params(dict): super parameters for build Class network 11 | """ 12 | 13 | def __init__(self, in_channels, class_dim, **kwargs): 14 | super(ClsHead, self).__init__() 15 | self.pool = nn.AdaptiveAvgPool2d(1) 16 | self.fc = nn.Linear(in_channels, class_dim, bias=True) 17 | 18 | def forward(self, x): 19 | x = self.pool(x) 20 | x = torch.reshape(x, shape=[x.shape[0], x.shape[1]]) 21 | x = self.fc(x) 22 | x = F.softmax(x, dim=1) 23 | return x 24 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class ClsPostProcess(object): 5 | """ Convert between text-label and text-index """ 6 | 7 | def __init__(self, label_list, **kwargs): 8 | super(ClsPostProcess, self).__init__() 9 | self.label_list = label_list 10 | 11 | def __call__(self, preds, label=None, *args, **kwargs): 12 | if isinstance(preds, torch.Tensor): 13 | preds = preds.cpu().numpy() 14 | pred_idxs = preds.argmax(axis=1) 15 | decode_out = [(self.label_list[idx], preds[i, idx]) 16 | for i, idx in enumerate(pred_idxs)] 17 | if label is None: 18 | return decode_out 19 | label = [(self.label_list[idx], 1.0) for idx in label] 20 | return decode_out, label -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | : 12 | ; 13 | < 14 | = 15 | > 16 | ? 17 | @ 18 | A 19 | B 20 | C 21 | D 22 | E 23 | F 24 | G 25 | H 26 | I 27 | J 28 | K 29 | L 30 | M 31 | N 32 | O 33 | P 34 | Q 35 | R 36 | S 37 | T 38 | U 39 | V 40 | W 41 | X 42 | Y 43 | Z 44 | [ 45 | \ 46 | ] 47 | ^ 48 | _ 49 | ` 50 | a 51 | b 52 | c 53 | d 54 | e 55 | f 56 | g 57 | h 58 | i 59 | j 60 | k 61 | l 62 | m 63 | n 64 | o 65 | p 66 | q 67 | r 68 | s 69 | t 70 | u 71 | v 72 | w 73 | x 74 | y 75 | z 76 | { 77 | | 78 | } 79 | ~ 80 | ! 81 | " 82 | # 83 | $ 84 | % 85 | & 86 | ' 87 | ( 88 | ) 89 | * 90 | + 91 | , 92 | - 93 | . 94 | / 95 | 96 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Opendatalab. All rights reserved. -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Opendatalab. All rights reserved. 2 | -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/reading_oreder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/reading_oreder/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/reading_oreder/layoutreader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/reading_oreder/layoutreader/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/table/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/table/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/table/rapidtable/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/model/sub_modules/table/rapidtable/__init__.py -------------------------------------------------------------------------------- /magic_pdf/model/sub_modules/table/table_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def minify_html(html): 5 | # 移除多余的空白字符 6 | html = re.sub(r'\s+', ' ', html) 7 | # 移除行尾的空白字符 8 | html = re.sub(r'\s*>\s*', '>', html) 9 | # 移除标签前的空白字符 10 | html = re.sub(r'\s*<\s*', '<', html) 11 | return html.strip() -------------------------------------------------------------------------------- /magic_pdf/post_proc/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Opendatalab. All rights reserved. 2 | -------------------------------------------------------------------------------- /magic_pdf/pre_proc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/pre_proc/__init__.py -------------------------------------------------------------------------------- /magic_pdf/pre_proc/construct_page_dict.py: -------------------------------------------------------------------------------- 1 | 2 | def ocr_construct_page_component_v2(blocks, layout_bboxes, page_id, page_w, page_h, layout_tree, 3 | images, tables, interline_equations, discarded_blocks, need_drop, drop_reason): 4 | return_dict = { 5 | 'preproc_blocks': blocks, 6 | 'layout_bboxes': layout_bboxes, 7 | 'page_idx': page_id, 8 | 'page_size': [page_w, page_h], 9 | '_layout_tree': layout_tree, 10 | 'images': images, 11 | 'tables': tables, 12 | 'interline_equations': interline_equations, 13 | 'discarded_blocks': discarded_blocks, 14 | 'need_drop': need_drop, 15 | 'drop_reason': drop_reason, 16 | } 17 | return return_dict 18 | -------------------------------------------------------------------------------- /magic_pdf/resources/fasttext-langdetect/lid.176.ftz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/resources/fasttext-langdetect/lid.176.ftz -------------------------------------------------------------------------------- /magic_pdf/resources/model_config/model_configs.yaml: -------------------------------------------------------------------------------- 1 | weights: 2 | layoutlmv3: Layout/LayoutLMv3/model_final.pth 3 | doclayout_yolo: Layout/YOLO/doclayout_yolo_docstructbench_imgsz1280_2501.pt 4 | yolo_v8_mfd: MFD/YOLO/yolo_v8_ft.pt 5 | unimernet_small: MFR/unimernet_hf_small_2503 6 | struct_eqtable: TabRec/StructEqTable 7 | tablemaster: TabRec/TableMaster 8 | rapid_table: TabRec/RapidTable -------------------------------------------------------------------------------- /magic_pdf/resources/slanet_plus/slanet-plus.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/resources/slanet_plus/slanet-plus.onnx -------------------------------------------------------------------------------- /magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt -------------------------------------------------------------------------------- /magic_pdf/spark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/spark/__init__.py -------------------------------------------------------------------------------- /magic_pdf/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/tools/__init__.py -------------------------------------------------------------------------------- /magic_pdf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/magic_pdf/utils/__init__.py -------------------------------------------------------------------------------- /magic_pdf/utils/annotations.py: -------------------------------------------------------------------------------- 1 | 2 | from loguru import logger 3 | 4 | 5 | def ImportPIL(f): 6 | try: 7 | import PIL # noqa: F401 8 | except ImportError: 9 | logger.error('Pillow not installed, please install by pip.') 10 | exit(1) 11 | return f 12 | -------------------------------------------------------------------------------- /next_docs/en/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.10" 7 | 8 | formats: 9 | - epub 10 | 11 | python: 12 | install: 13 | - requirements: next_docs/requirements.txt 14 | 15 | sphinx: 16 | configuration: next_docs/en/conf.py 17 | -------------------------------------------------------------------------------- /next_docs/en/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /next_docs/en/_static/image/MinerU-logo-hq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/MinerU-logo-hq.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/MinerU-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/MinerU-logo.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/datalab_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/datalab_logo.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/flowchart_en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/flowchart_en.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/flowchart_zh_cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/flowchart_zh_cn.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/inference_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/inference_result.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/layout_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/layout_example.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/logo.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/poly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/poly.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/project_panorama_en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/project_panorama_en.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/project_panorama_zh_cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/project_panorama_zh_cn.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/spans_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/spans_example.png -------------------------------------------------------------------------------- /next_docs/en/_static/image/web_demo_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/en/_static/image/web_demo_1.png -------------------------------------------------------------------------------- /next_docs/en/additional_notes/glossary.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | Glossary 4 | =========== 5 | 6 | 1. jsonl 7 | Newline-delimited (\n), and each line must be a valid, independent JSON object. 8 | Currently, All the function shipped with **MinerU** assume that json object must contain one field named with either **path** or **file_location** 9 | 10 | 11 | 2. magic-pdf.json 12 | TODO 13 | 14 | 15 | -------------------------------------------------------------------------------- /next_docs/en/api.rst: -------------------------------------------------------------------------------- 1 | 2 | .. toctree:: 3 | :maxdepth: 2 4 | 5 | api/dataset 6 | api/data_reader_writer 7 | api/read_api 8 | api/schemas 9 | api/io 10 | api/pipe_operators 11 | api/model_operators -------------------------------------------------------------------------------- /next_docs/en/api/dataset.rst: -------------------------------------------------------------------------------- 1 | Dataset 2 | ======== 3 | 4 | .. autoclass:: magic_pdf.data.dataset.PageableData 5 | :members: 6 | :inherited-members: 7 | :show-inheritance: 8 | 9 | 10 | .. autoclass:: magic_pdf.data.dataset.Dataset 11 | :members: 12 | :inherited-members: 13 | :show-inheritance: 14 | 15 | .. autoclass:: magic_pdf.data.dataset.ImageDataset 16 | :members: 17 | :inherited-members: 18 | :show-inheritance: 19 | 20 | .. autoclass:: magic_pdf.data.dataset.PymuDocDataset 21 | :members: 22 | :inherited-members: 23 | :show-inheritance: 24 | 25 | .. autoclass:: magic_pdf.data.dataset.Doc 26 | :members: 27 | :inherited-members: 28 | :show-inheritance: 29 | -------------------------------------------------------------------------------- /next_docs/en/api/io.rst: -------------------------------------------------------------------------------- 1 | IO 2 | == 3 | 4 | .. autoclass:: magic_pdf.data.io.base.IOReader 5 | :members: 6 | :inherited-members: 7 | :show-inheritance: 8 | 9 | .. autoclass:: magic_pdf.data.io.base.IOWriter 10 | :members: 11 | :inherited-members: 12 | :show-inheritance: 13 | 14 | .. autoclass:: magic_pdf.data.io.s3.S3Reader 15 | :members: 16 | :inherited-members: 17 | :show-inheritance: 18 | 19 | .. autoclass:: magic_pdf.data.io.s3.S3Writer 20 | :members: 21 | :inherited-members: 22 | :show-inheritance: 23 | 24 | .. autoclass:: magic_pdf.data.io.http.HttpReader 25 | :members: 26 | :inherited-members: 27 | :show-inheritance: 28 | 29 | .. autoclass:: magic_pdf.data.io.http.HttpWriter 30 | :members: 31 | :inherited-members: 32 | :show-inheritance: 33 | 34 | -------------------------------------------------------------------------------- /next_docs/en/api/model_operators.rst: -------------------------------------------------------------------------------- 1 | 2 | Model Api 3 | ========== 4 | 5 | .. autoclass:: magic_pdf.operators.InferenceResultBase 6 | :members: 7 | :inherited-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /next_docs/en/api/pipe_operators.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | Pipeline Api 4 | ============= 5 | 6 | .. autoclass:: magic_pdf.operators.pipes.PipeResult 7 | :members: 8 | :inherited-members: 9 | :show-inheritance: 10 | -------------------------------------------------------------------------------- /next_docs/en/api/read_api.rst: -------------------------------------------------------------------------------- 1 | read_api 2 | ========= 3 | 4 | .. automodule:: magic_pdf.data.read_api 5 | :members: 6 | :inherited-members: 7 | -------------------------------------------------------------------------------- /next_docs/en/api/schemas.rst: -------------------------------------------------------------------------------- 1 | 2 | schemas 3 | =========== 4 | 5 | .. autopydantic_model:: magic_pdf.data.schemas.S3Config 6 | :members: 7 | 8 | .. autopydantic_model:: magic_pdf.data.schemas.PageInfo 9 | :members: 10 | 11 | -------------------------------------------------------------------------------- /next_docs/en/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /next_docs/en/user_guide.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | .. toctree:: 4 | :maxdepth: 2 5 | 6 | user_guide/install 7 | user_guide/usage 8 | user_guide/quick_start 9 | user_guide/tutorial 10 | user_guide/data 11 | user_guide/inference_result 12 | user_guide/pipe_result 13 | -------------------------------------------------------------------------------- /next_docs/en/user_guide/data.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | Data 4 | ========= 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | data/dataset 10 | 11 | data/read_api 12 | 13 | data/data_reader_writer 14 | 15 | data/io 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /next_docs/en/user_guide/data/io.rst: -------------------------------------------------------------------------------- 1 | 2 | IO 3 | === 4 | 5 | Aims for read or write bytes from different media, Currently We provide ``S3Reader``, ``S3Writer`` for AWS S3 compatible media 6 | and ``HttpReader``, ``HttpWriter`` for remote Http file. You can implement new classes to meet the needs of your personal scenarios 7 | if MinerU have not provide the suitable classes. It is easy to implement new classes, the only one requirement is to inherit from 8 | ``IOReader`` or ``IOWriter`` 9 | 10 | .. code:: python 11 | 12 | class SomeReader(IOReader): 13 | def read(self, path: str) -> bytes: 14 | pass 15 | 16 | def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes: 17 | pass 18 | 19 | 20 | class SomeWriter(IOWriter): 21 | def write(self, path: str, data: bytes) -> None: 22 | pass 23 | 24 | Check :doc:`../../api/io` for more details 25 | 26 | -------------------------------------------------------------------------------- /next_docs/en/user_guide/install.rst: -------------------------------------------------------------------------------- 1 | 2 | Installation 3 | ============== 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | 8 | install/install 9 | install//boost_with_cuda 10 | install/download_model_weight_files 11 | install/config 12 | 13 | -------------------------------------------------------------------------------- /next_docs/en/user_guide/quick_start.rst: -------------------------------------------------------------------------------- 1 | 2 | Quick Start 3 | ============== 4 | 5 | Want to learn about the usage methods under different scenarios ? This page gives good examples about multiple usage cases match your needs. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | quick_start/convert_pdf 11 | quick_start/convert_image 12 | quick_start/convert_ms_office 13 | -------------------------------------------------------------------------------- /next_docs/en/user_guide/tutorial.rst: -------------------------------------------------------------------------------- 1 | 2 | Tutorial 3 | =========== 4 | 5 | From the beginning to the end, Show how to using mineru via a minimal project 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | tutorial/pipeline 11 | 12 | -------------------------------------------------------------------------------- /next_docs/en/user_guide/usage.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | Usage 4 | ======== 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | usage/command_line 10 | usage/api 11 | usage/docker 12 | 13 | -------------------------------------------------------------------------------- /next_docs/en/user_guide/usage/docker.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | Docker 4 | ======= 5 | 6 | .. admonition:: Important 7 | :class: tip 8 | 9 | Docker requires a GPU with at least 16GB of VRAM, and all acceleration features are enabled by default. 10 | 11 | Before running this Docker, you can use the following command to check if your device supports CUDA acceleration on Docker. 12 | 13 | .. code-block:: bash 14 | 15 | bash docker run --rm --gpus=all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi 16 | 17 | 18 | .. code:: sh 19 | 20 | wget https://github.com/opendatalab/MinerU/raw/master/Dockerfile 21 | docker build -t mineru:latest . 22 | docker run --rm -it --gpus=all mineru:latest /bin/bash 23 | magic-pdf --help 24 | 25 | -------------------------------------------------------------------------------- /next_docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.26.4 2 | click==8.1.7 3 | fast-langdetect==0.2.2 4 | Brotli==1.1.0 5 | boto3>=1.28.43 6 | loguru>=0.6.0 7 | myst-parser 8 | Pillow==8.4.0 9 | pydantic>=2.7.2,<2.8.0 10 | PyMuPDF>=1.24.9 11 | pdfminer.six==20231228 12 | sphinx 13 | sphinx-argparse>=0.5.2 14 | sphinx-book-theme>=1.1.3 15 | sphinx-copybutton>=0.5.2 16 | sphinx_rtd_theme>=3.0.1 17 | autodoc_pydantic>=2.2.0 18 | -------------------------------------------------------------------------------- /next_docs/zh_cn/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.10" 7 | 8 | formats: 9 | - epub 10 | 11 | python: 12 | install: 13 | - requirements: next_docs/requirements.txt 14 | 15 | sphinx: 16 | configuration: next_docs/zh_cn/conf.py 17 | -------------------------------------------------------------------------------- /next_docs/zh_cn/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/MinerU-logo-hq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/MinerU-logo-hq.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/MinerU-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/MinerU-logo.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/datalab_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/datalab_logo.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/flowchart_en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/flowchart_en.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/flowchart_zh_cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/flowchart_zh_cn.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/inference_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/inference_result.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/layout_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/layout_example.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/logo.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/poly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/poly.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/project_panorama_en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/project_panorama_en.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/project_panorama_zh_cn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/project_panorama_zh_cn.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/spans_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/spans_example.png -------------------------------------------------------------------------------- /next_docs/zh_cn/_static/image/web_demo_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/next_docs/zh_cn/_static/image/web_demo_1.png -------------------------------------------------------------------------------- /next_docs/zh_cn/additional_notes/glossary.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | 名词解释 4 | =========== 5 | 6 | 1. jsonl 7 | TODO: add description 8 | 9 | 2. magic-pdf.json 10 | TODO: add description 11 | 12 | -------------------------------------------------------------------------------- /next_docs/zh_cn/additional_notes/known_issues.rst: -------------------------------------------------------------------------------- 1 | 已知问题 2 | ============ 3 | 4 | - 阅读顺序基于模型对可阅读内容在空间中的分布进行排序,在极端复杂的排版下可能会部分区域乱序 5 | - 不支持竖排文字 6 | - 目录和列表通过规则进行识别,少部分不常见的列表形式可能无法识别 7 | - 标题只有一级,目前不支持标题分级 8 | - 代码块在layout模型里还没有支持 9 | - 漫画书、艺术图册、小学教材、习题尚不能很好解析 10 | - 表格识别在复杂表格上可能会出现行/列识别错误 11 | - 在小语种PDF上,OCR识别可能会出现字符不准确的情况(如拉丁文的重音符号、阿拉伯文易混淆字符等) 12 | - 部分公式可能会无法在markdown中渲染 13 | 14 | -------------------------------------------------------------------------------- /next_docs/zh_cn/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /next_docs/zh_cn/user_guide.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | .. toctree:: 4 | :maxdepth: 2 5 | 6 | user_guide/install 7 | user_guide/quick_start 8 | user_guide/tutorial 9 | user_guide/data 10 | 11 | -------------------------------------------------------------------------------- /next_docs/zh_cn/user_guide/data.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | 数据 4 | ========= 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | :caption: 数据 9 | 10 | data/dataset 11 | 12 | data/read_api 13 | 14 | data/data_reader_writer 15 | 16 | data/io 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /next_docs/zh_cn/user_guide/data/dataset.rst: -------------------------------------------------------------------------------- 1 | 2 | 数据集 3 | ====== 4 | 5 | 导入数据类 6 | ----------- 7 | 8 | 数据集 9 | ^^^^^^^^ 10 | 11 | 每个 PDF 或图像将形成一个 Dataset。众所周知,PDF 有两种类别::ref:`TXT ` 或 :ref:`OCR ` 方法部分。从图像中可以获得 ImageDataset,它是 Dataset 的子类;从 PDF 文件中可以获得 PymuDocDataset。ImageDataset 和 PymuDocDataset 之间的区别在于 ImageDataset 仅支持 OCR 解析方法,而 PymuDocDataset 支持 OCR 和 TXT 两种方法。 12 | 13 | .. note:: 14 | 15 | 实际上,有些 PDF 可能是由图像生成的,这意味着它们不支持 `TXT` 方法。目前,由用户保证不会调用 `TXT` 方法来解析图像生成的 PDF 16 | 17 | PDF 解析方法 18 | --------------- 19 | 20 | .. _ocr_method_section: 21 | 22 | OCR 23 | ^^^^ 24 | 通过 光学字符识别 技术提取字符。 25 | 26 | .. _digital_method_section: 27 | 28 | TXT 29 | ^^^^^^^^ 30 | 通过第三方库提取字符,目前我们使用的是 pymupdf。 31 | 32 | -------------------------------------------------------------------------------- /next_docs/zh_cn/user_guide/data/io.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | IO 4 | ==== 5 | 6 | 旨在从不同的媒介读取或写入字节。目前,我们提供了 S3Reader 和 S3Writer 用于兼容 AWS S3 的媒介,以及 HttpReader 和 HttpWriter 用于远程 HTTP 文件。如果 MinerU 没有提供合适的类,你可以实现新的类以满足个人场景的需求。实现新的类非常容易,唯一的要求是继承自 IOReader 或 IOWriter。 7 | 8 | .. code:: python 9 | 10 | class SomeReader(IOReader): 11 | def read(self, path: str) -> bytes: 12 | pass 13 | 14 | def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes: 15 | pass 16 | 17 | 18 | class SomeWriter(IOWriter): 19 | def write(self, path: str, data: bytes) -> None: 20 | pass 21 | 22 | -------------------------------------------------------------------------------- /next_docs/zh_cn/user_guide/install.rst: -------------------------------------------------------------------------------- 1 | 2 | 安装 3 | ============== 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | :caption: 安装文档 8 | 9 | install/install 10 | install//boost_with_cuda 11 | install/download_model_weight_files 12 | 13 | 14 | -------------------------------------------------------------------------------- /next_docs/zh_cn/user_guide/quick_start.rst: -------------------------------------------------------------------------------- 1 | 2 | 快速开始 3 | ============== 4 | 5 | 从这里开始学习 MinerU 基本使用方法。若还没有安装,请参考安装文档进行安装 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :caption: 快速开始 10 | 11 | quick_start/command_line 12 | quick_start/to_markdown 13 | 14 | -------------------------------------------------------------------------------- /next_docs/zh_cn/user_guide/tutorial.rst: -------------------------------------------------------------------------------- 1 | 2 | 教程 3 | =========== 4 | 5 | 让我们通过构建一个最小项目来学习 MinerU 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :caption: 教程 10 | 11 | tutorial/output_file_description 12 | tutorial/pipeline 13 | 14 | -------------------------------------------------------------------------------- /projects/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to the MinerU Project List 2 | 3 | ## Project List 4 | 5 | - [llama_index_rag](./llama_index_rag/README.md): Build a lightweight RAG system based on llama_index 6 | - [gradio_app](./gradio_app/README.md): Build a web app based on gradio 7 | - ~~[web_demo](./web_demo/README.md): MinerU online [demo](https://opendatalab.com/OpenSourceTools/Extractor/PDF/) localized deployment version~~(Deprecated) 8 | - [web_api](./web_api/README.md): Web API Based on FastAPI 9 | - [multi_gpu](./multi_gpu/README.md): Multi-GPU parallel processing based on LitServe 10 | -------------------------------------------------------------------------------- /projects/README_zh-CN.md: -------------------------------------------------------------------------------- 1 | # 欢迎来到 MinerU 项目列表 2 | 3 | ## 项目列表 4 | 5 | - [llama_index_rag](./llama_index_rag/README_zh-CN.md): 基于 llama_index 构建轻量级 RAG 系统 6 | - [gradio_app](./gradio_app/README_zh-CN.md): 基于 Gradio 的 Web 应用 7 | - ~~[web_demo](./web_demo/README_zh-CN.md): MinerU在线[demo](https://opendatalab.com/OpenSourceTools/Extractor/PDF/)本地化部署版本~~(已过时) 8 | - [web_api](./web_api/README.md): 基于 FastAPI 的 Web API 9 | - [multi_gpu](./multi_gpu/README.md): 基于 LitServe 的多 GPU 并行处理 10 | -------------------------------------------------------------------------------- /projects/gradio_app/README.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | MinerU(>=0.8.0) 4 | > If you already have a functioning MinerU environment, you can skip this step. 5 | > 6 | [Deploy in CPU environment](https://github.com/opendatalab/MinerU?tab=readme-ov-file#quick-cpu-demo) 7 | 8 | [Deploy in GPU environment](https://github.com/opendatalab/MinerU?tab=readme-ov-file#using-gpu) 9 | 10 | Third-party Software 11 | 12 | ```bash 13 | pip install gradio gradio-pdf 14 | ``` 15 | 16 | ## Start Gradio App 17 | 18 | ```bash 19 | python app.py 20 | ``` 21 | 22 | ## Use Gradio App 23 | 24 | Access http://127.0.0.1:7860 in your web browser -------------------------------------------------------------------------------- /projects/gradio_app/README_zh-CN.md: -------------------------------------------------------------------------------- 1 | ## 安装 2 | 3 | MinerU(>=0.8.0) 4 | >如已有正常运行的MinerU环境则可以跳过此步骤 5 | > 6 | [在CPU环境部署](https://github.com/opendatalab/MinerU/blob/master/README_zh-CN.md#%E4%BD%BF%E7%94%A8cpu%E5%BF%AB%E9%80%9F%E4%BD%93%E9%AA%8C) 7 | 8 | [在GPU环境部署](https://github.com/opendatalab/MinerU/blob/master/README_zh-CN.md#%E4%BD%BF%E7%94%A8gpu) 9 | 10 | 第三方软件 11 | 12 | ```bash 13 | pip install gradio gradio-pdf 14 | ``` 15 | 16 | ## 启动gradio应用 17 | 18 | ```bash 19 | python app.py 20 | ``` 21 | 22 | ## 使用gradio应用 23 | 24 | 在浏览器中访问 http://127.0.0.1:7860 -------------------------------------------------------------------------------- /projects/gradio_app/examples/2list_1table.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/gradio_app/examples/2list_1table.pdf -------------------------------------------------------------------------------- /projects/gradio_app/examples/3list_1table.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/gradio_app/examples/3list_1table.pdf -------------------------------------------------------------------------------- /projects/gradio_app/examples/academic_paper_formula.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/gradio_app/examples/academic_paper_formula.pdf -------------------------------------------------------------------------------- /projects/gradio_app/examples/academic_paper_img_formula.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/gradio_app/examples/academic_paper_img_formula.pdf -------------------------------------------------------------------------------- /projects/gradio_app/examples/academic_paper_list.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/gradio_app/examples/academic_paper_list.pdf -------------------------------------------------------------------------------- /projects/gradio_app/examples/complex_layout.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/gradio_app/examples/complex_layout.pdf -------------------------------------------------------------------------------- /projects/gradio_app/examples/complex_layout_para_split_list.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/gradio_app/examples/complex_layout_para_split_list.pdf -------------------------------------------------------------------------------- /projects/gradio_app/examples/garbled_formula.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/gradio_app/examples/garbled_formula.pdf -------------------------------------------------------------------------------- /projects/gradio_app/examples/magazine_complex_layout_images_list.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/gradio_app/examples/magazine_complex_layout_images_list.pdf -------------------------------------------------------------------------------- /projects/gradio_app/examples/scanned.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/gradio_app/examples/scanned.pdf -------------------------------------------------------------------------------- /projects/gradio_app/requirements.txt: -------------------------------------------------------------------------------- 1 | magic-pdf[full]>=0.8.0 2 | gradio 3 | gradio-pdf -------------------------------------------------------------------------------- /projects/llama_index_rag/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | es: 3 | container_name: es 4 | image: docker.elastic.co/elasticsearch/elasticsearch:8.11.3 5 | volumes: 6 | - esdata01:/usr/share/elasticsearch/data 7 | ports: 8 | - 9200:9200 9 | environment: 10 | - node.name=es 11 | - ELASTIC_PASSWORD=llama_index 12 | - bootstrap.memory_lock=false 13 | - discovery.type=single-node 14 | - xpack.security.enabled=true 15 | - xpack.security.http.ssl.enabled=false 16 | - xpack.security.transport.ssl.enabled=false 17 | ulimits: 18 | memlock: 19 | soft: -1 20 | hard: -1 21 | restart: always 22 | volumes: 23 | esdata01: 24 | driver: local 25 | -------------------------------------------------------------------------------- /projects/llama_index_rag/example/data/declaration_of_the_rights_of_man_1789.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/llama_index_rag/example/data/declaration_of_the_rights_of_man_1789.pdf -------------------------------------------------------------------------------- /projects/llama_index_rag/rag_data_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/llama_index_rag/rag_data_api.png -------------------------------------------------------------------------------- /projects/web/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /projects/web/eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from '@eslint/js' 2 | import globals from 'globals' 3 | import reactHooks from 'eslint-plugin-react-hooks' 4 | import reactRefresh from 'eslint-plugin-react-refresh' 5 | import tseslint from 'typescript-eslint' 6 | 7 | export default tseslint.config( 8 | { ignores: ['dist'] }, 9 | { 10 | extends: [js.configs.recommended, ...tseslint.configs.recommended], 11 | files: ['**/*.{ts,tsx}'], 12 | languageOptions: { 13 | ecmaVersion: 2020, 14 | globals: globals.browser, 15 | }, 16 | plugins: { 17 | 'react-hooks': reactHooks, 18 | 'react-refresh': reactRefresh, 19 | }, 20 | rules: { 21 | ...reactHooks.configs.recommended.rules, 22 | 'react-refresh/only-export-components': [ 23 | 'warn', 24 | { allowConstantExport: true }, 25 | ], 26 | }, 27 | }, 28 | ) 29 | -------------------------------------------------------------------------------- /projects/web/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | MinerU 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /projects/web/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/build/test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/build/test.js -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/78-EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/78-EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/78-EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/78-EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/78-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/78-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/78-RKSJ-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/78-RKSJ-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/78-RKSJ-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/78-RKSJ-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/78-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/78-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/78ms-RKSJ-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/78ms-RKSJ-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/78ms-RKSJ-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/78ms-RKSJ-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/83pv-RKSJ-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/83pv-RKSJ-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/90ms-RKSJ-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/90ms-RKSJ-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/90ms-RKSJ-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/90ms-RKSJ-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/90msp-RKSJ-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/90msp-RKSJ-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/90msp-RKSJ-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/90msp-RKSJ-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/90pv-RKSJ-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/90pv-RKSJ-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/90pv-RKSJ-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/90pv-RKSJ-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Add-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Add-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Add-RKSJ-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Add-RKSJ-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Add-RKSJ-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Add-RKSJ-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Add-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Add-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-0.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-0.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-1.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-1.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-2.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-2.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-3.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-3.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-4.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-4.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-5.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-5.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-6.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-6.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-UCS2.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-CNS1-UCS2.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-0.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-0.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-1.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-1.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-2.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-2.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-3.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-3.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-4.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-4.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-5.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-5.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-UCS2.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-GB1-UCS2.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-0.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-0.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-1.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-1.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-2.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-2.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-3.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-3.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-4.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-4.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-5.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-5.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-6.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-6.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-UCS2.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Japan1-UCS2.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Korea1-0.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Korea1-0.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Korea1-1.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Korea1-1.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Korea1-2.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Korea1-2.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Adobe-Korea1-UCS2.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Adobe-Korea1-UCS2.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/B5-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/B5-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/B5-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/B5-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/B5pc-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/B5pc-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/B5pc-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/B5pc-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/CNS-EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/CNS-EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/CNS-EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/CNS-EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/CNS1-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/CNS1-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/CNS1-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/CNS1-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/CNS2-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/CNS2-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/CNS2-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/CNS2-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/ETHK-B5-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/ETHK-B5-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/ETHK-B5-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/ETHK-B5-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/ETen-B5-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/ETen-B5-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/ETen-B5-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/ETen-B5-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/ETenms-B5-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/ETenms-B5-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/ETenms-B5-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/ETenms-B5-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Ext-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Ext-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Ext-RKSJ-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Ext-RKSJ-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Ext-RKSJ-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Ext-RKSJ-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Ext-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Ext-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GB-EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GB-EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GB-EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GB-EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GB-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GB-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GB-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GB-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBK-EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBK-EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBK-EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBK-EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBK2K-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBK2K-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBK2K-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBK2K-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBKp-EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBKp-EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBKp-EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBKp-EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBT-EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBT-EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBT-EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBT-EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBT-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBT-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBT-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBT-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBTpc-EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBTpc-EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBTpc-EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBTpc-EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBpc-EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBpc-EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/GBpc-EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/GBpc-EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKdla-B5-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKdla-B5-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKdla-B5-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKdla-B5-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKdlb-B5-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKdlb-B5-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKdlb-B5-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKdlb-B5-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKgccs-B5-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKgccs-B5-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKgccs-B5-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKgccs-B5-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKm314-B5-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKm314-B5-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKm314-B5-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKm314-B5-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKm471-B5-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKm471-B5-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKm471-B5-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKm471-B5-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKscs-B5-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKscs-B5-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/HKscs-B5-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/HKscs-B5-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Hankaku.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Hankaku.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Hiragana.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Hiragana.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSC-EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSC-EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSC-EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSC-EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSC-Johab-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSC-Johab-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSC-Johab-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSC-Johab-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSCms-UHC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSCms-UHC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSCms-UHC-HW-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSCms-UHC-HW-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSCms-UHC-HW-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSCms-UHC-HW-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSCms-UHC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSCms-UHC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSCpc-EUC-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSCpc-EUC-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/KSCpc-EUC-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/KSCpc-EUC-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Katakana.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Katakana.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/NWP-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/NWP-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/NWP-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/NWP-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/RKSJ-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/RKSJ-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/RKSJ-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/RKSJ-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/Roman.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/Roman.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UCS2-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UCS2-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UCS2-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UCS2-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF16-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF16-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF16-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF16-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF32-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF32-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF32-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF32-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF8-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF8-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF8-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniCNS-UTF8-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniGB-UCS2-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniGB-UCS2-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniGB-UCS2-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniGB-UCS2-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF16-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF16-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF16-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF16-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF32-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF32-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF32-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF32-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF8-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF8-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF8-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniGB-UTF8-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UCS2-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UCS2-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UCS2-HW-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UCS2-HW-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UCS2-HW-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UCS2-HW-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UCS2-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UCS2-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF16-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF16-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF16-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF16-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF32-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF32-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF32-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF32-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF8-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF8-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF8-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS-UTF8-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF16-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF16-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF16-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF16-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF32-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF32-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF32-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF32-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF8-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF8-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF8-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJIS2004-UTF8-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJISPro-UCS2-HW-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJISPro-UCS2-HW-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJISPro-UCS2-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJISPro-UCS2-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJISPro-UTF8-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJISPro-UTF8-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJISX0213-UTF32-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJISX0213-UTF32-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJISX0213-UTF32-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJISX0213-UTF32-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJISX02132004-UTF32-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJISX02132004-UTF32-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniJISX02132004-UTF32-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniJISX02132004-UTF32-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniKS-UCS2-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniKS-UCS2-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniKS-UCS2-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniKS-UCS2-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF16-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF16-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF16-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF16-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF32-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF32-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF32-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF32-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF8-H.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF8-H.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF8-V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/UniKS-UTF8-V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/V.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/V.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/cmaps/WP-Symbol.bcmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/cmaps/WP-Symbol.bcmap -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/custom.css: -------------------------------------------------------------------------------- 1 | .tooltip { 2 | position: absolute; 3 | background-color: #333; 4 | color: white; 5 | padding: 5px 10px; 6 | border-radius: 4px; 7 | font-size: 14px; 8 | z-index: 1000; 9 | pointer-events: none; 10 | } 11 | .tooltip::before { 12 | content: ''; 13 | position: absolute; 14 | top: -5px; 15 | left: 50%; 16 | transform: translateX(-50%); 17 | border-left: 5px solid transparent; 18 | border-right: 5px solid transparent; 19 | border-bottom: 5px solid #333; 20 | } 21 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/custom.js: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/altText_add.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/annotation-check.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | 11 | 12 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/annotation-comment.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | 13 | 16 | 17 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/annotation-insert.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | 10 | 11 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/annotation-newparagraph.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | 11 | 12 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/annotation-noicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/annotation-paperclip.svg: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/editor-toolbar-delete.svg: -------------------------------------------------------------------------------- 1 | 2 | 5 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/findbarButton-next.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/findbarButton-previous.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/gv-toolbarButton-download.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/loading-icon.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/images/loading-icon.gif -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-documentProperties.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-firstPage.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-lastPage.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-rotateCcw.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-rotateCw.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-scrollHorizontal.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-scrollPage.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-scrollVertical.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-spreadEven.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-spreadNone.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/secondaryToolbarButton-spreadOdd.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-bookmark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-editorFreeText.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-editorHighlight.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-editorStamp.svg: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-menuArrow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-pageDown.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-pageUp.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-presentationMode.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-print.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-sidebarToggle.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-viewAttachments.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-viewLayers.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-viewOutline.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-zoomIn.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/toolbarButton-zoomOut.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/treeitem-collapsed.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/images/treeitem-expanded.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/FoxitDingbats.pfb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/FoxitDingbats.pfb -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/FoxitFixed.pfb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/FoxitFixed.pfb -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/FoxitFixedBold.pfb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/FoxitFixedBold.pfb -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/FoxitFixedBoldItalic.pfb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/FoxitFixedBoldItalic.pfb -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/FoxitFixedItalic.pfb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/FoxitFixedItalic.pfb -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/FoxitSerif.pfb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/FoxitSerif.pfb -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/FoxitSerifBold.pfb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/FoxitSerifBold.pfb -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/FoxitSerifBoldItalic.pfb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/FoxitSerifBoldItalic.pfb -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/FoxitSerifItalic.pfb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/FoxitSerifItalic.pfb -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/FoxitSymbol.pfb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/FoxitSymbol.pfb -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/LiberationSans-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/LiberationSans-Bold.ttf -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/LiberationSans-BoldItalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/LiberationSans-BoldItalic.ttf -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/LiberationSans-Italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/LiberationSans-Italic.ttf -------------------------------------------------------------------------------- /projects/web/public/pdfjs-dist/web/standard_fonts/LiberationSans-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/public/pdfjs-dist/web/standard_fonts/LiberationSans-Regular.ttf -------------------------------------------------------------------------------- /projects/web/src/App.css: -------------------------------------------------------------------------------- 1 | body,html, #root { 2 | width: 100%; 3 | height: 100vh; 4 | background: white; 5 | } 6 | 7 | .logo { 8 | height: 6em; 9 | padding: 1.5em; 10 | will-change: filter; 11 | transition: filter 300ms; 12 | } 13 | .logo:hover { 14 | filter: drop-shadow(0 0 2em #646cffaa); 15 | } 16 | .logo.react:hover { 17 | filter: drop-shadow(0 0 2em #61dafbaa); 18 | } 19 | 20 | @keyframes logo-spin { 21 | from { 22 | transform: rotate(0deg); 23 | } 24 | to { 25 | transform: rotate(360deg); 26 | } 27 | } 28 | 29 | @media (prefers-reduced-motion: no-preference) { 30 | a:nth-of-type(2) .logo { 31 | animation: logo-spin infinite 20s linear; 32 | } 33 | } 34 | 35 | .card { 36 | padding: 2em; 37 | } 38 | 39 | .read-the-docs { 40 | color: #888; 41 | } 42 | -------------------------------------------------------------------------------- /projects/web/src/App.tsx: -------------------------------------------------------------------------------- 1 | import { Home } from "./pages/home"; 2 | import "./App.css"; 3 | import QueryProvider from "./context/query-provider"; 4 | 5 | function App() { 6 | return ( 7 | 8 | 9 | 10 | ); 11 | } 12 | 13 | export default App; 14 | -------------------------------------------------------------------------------- /projects/web/src/assets/imgs/online.experience/UploadingOutlined.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /projects/web/src/assets/pdf/comingSoonLayer.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /projects/web/src/assets/pdf/exitFullScreen.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /projects/web/src/assets/pdf/extractor-queue.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /projects/web/src/assets/pdf/fullScreen.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /projects/web/src/assets/pdf/pdf-upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/src/assets/pdf/pdf-upload.png -------------------------------------------------------------------------------- /projects/web/src/components/code-mirror/index.module.scss: -------------------------------------------------------------------------------- 1 | .code-mirror { 2 | :global { 3 | .ͼ1 .cm-scroller { 4 | overflow-x: visible !important; 5 | border: none !important; 6 | } 7 | .cm-editor.cm-focused { 8 | outline: none !important; 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /projects/web/src/components/icon-font.tsx: -------------------------------------------------------------------------------- 1 | import { createFromIconfontCN } from "@ant-design/icons"; 2 | 3 | const IconFont = createFromIconfontCN({ 4 | scriptUrl: `/iconfont.js`, 5 | }); 6 | 7 | export default IconFont; 8 | -------------------------------------------------------------------------------- /projects/web/src/components/loading-animation/index.tsx: -------------------------------------------------------------------------------- 1 | import styles from './loadingAnimation.module.scss'; 2 | 3 | interface ILoadingAnimationProps { 4 | className?: string; 5 | } 6 | 7 | const LoadingAnimation = (props: ILoadingAnimationProps) => { 8 | const { className } = props; 9 | return
; 10 | }; 11 | 12 | export default LoadingAnimation; 13 | -------------------------------------------------------------------------------- /projects/web/src/components/loading-animation/loadingAnimation.module.scss: -------------------------------------------------------------------------------- 1 | .loader { 2 | width: 14px; 3 | height: 14px; 4 | border-radius: 50%; 5 | display: inline-block; 6 | position: relative; 7 | /* stylelint-disable-next-line alpha-value-notation */ 8 | background: linear-gradient(0deg, rgba(13,83,222,1) 0%, rgba(43,105,226,1) 30%, rgba(13, 20, 222, 0) 100%); 9 | box-sizing: border-box; 10 | animation: rotation 1.5s linear infinite; 11 | } 12 | 13 | .loader::after { 14 | content: ''; 15 | box-sizing: border-box; 16 | position: absolute; 17 | left: 50%; 18 | top: 50%; 19 | transform: translate(-50%, -50%); 20 | width: 10px; 21 | height: 10px; 22 | border-radius: 50%; 23 | background: #fff; 24 | } 25 | 26 | @keyframes rotation { 27 | 0% { transform: rotate(0deg) } 28 | 100% { transform: rotate(360deg)} 29 | } 30 | -------------------------------------------------------------------------------- /projects/web/src/components/text-tooltip/index.module.scss: -------------------------------------------------------------------------------- 1 | .textTooltip { 2 | :global { 3 | .ant-tooltip-arrow { 4 | // display: none !important; 5 | } 6 | .ant-tooltip-inner, .ant-tooltip-content, .ant-tooltip-inner-content { 7 | padding: 0px !important; 8 | border-radius: 4px !important; 9 | overflow: hidden; 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /projects/web/src/constant/event.ts: -------------------------------------------------------------------------------- 1 | export const PDF_DRIVE_MD = "pdf-drive-md"; 2 | export const MD_DRIVE_PDF = "pdf-drive-md"; 3 | 4 | export const ADD_TASK_LIST = "add-task-list"; 5 | 6 | export const UPDATE_TASK_LIST = "update-task-list"; 7 | -------------------------------------------------------------------------------- /projects/web/src/constant/index.tsx: -------------------------------------------------------------------------------- 1 | export enum Language { 2 | ZH_CN = "zh-CN", 3 | EN_US = "en-US", 4 | } 5 | -------------------------------------------------------------------------------- /projects/web/src/constant/route.ts: -------------------------------------------------------------------------------- 1 | export enum Path { 2 | Home = "/", 3 | Settings = "/settings", 4 | } 5 | 6 | export enum SlotID { 7 | AppBody = "app-body", 8 | } 9 | -------------------------------------------------------------------------------- /projects/web/src/constant/storage.ts: -------------------------------------------------------------------------------- 1 | export const LOCALE_STORAGE_KEY = "locale-minerU" -------------------------------------------------------------------------------- /projects/web/src/context/query-provider.tsx: -------------------------------------------------------------------------------- 1 | // QueryProvider.tsx 2 | import React from "react"; 3 | import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; 4 | 5 | const defaultQueryClientConfig = { 6 | defaultOptions: { 7 | queries: { 8 | retry: false, 9 | refetchOnWindowFocus: false, 10 | staleTime: 5 * 60 * 1000, // 5 minutes 11 | }, 12 | }, 13 | }; 14 | 15 | interface QueryProviderProps { 16 | children: React.ReactNode; 17 | } 18 | 19 | const QueryProvider: React.FC = ({ children }) => { 20 | const queryClient = new QueryClient({ 21 | ...defaultQueryClientConfig, 22 | }); 23 | 24 | return ( 25 | {children} 26 | ); 27 | }; 28 | 29 | export default QueryProvider; 30 | -------------------------------------------------------------------------------- /projects/web/src/locale/common/en.json: -------------------------------------------------------------------------------- 1 | { 2 | "common.cancel": "Cancel", 3 | "common.confirm": "Confirm", 4 | "common.retry": "retry" 5 | } -------------------------------------------------------------------------------- /projects/web/src/locale/common/zh.json: -------------------------------------------------------------------------------- 1 | { 2 | "common.cancel": "取消", 3 | "common.confirm": "确定", 4 | "common.retry": "retry" 5 | } -------------------------------------------------------------------------------- /projects/web/src/main.tsx: -------------------------------------------------------------------------------- 1 | import { StrictMode } from 'react' 2 | import { createRoot } from 'react-dom/client' 3 | import App from './App.tsx' 4 | import './index.css' 5 | 6 | createRoot(document.getElementById('root')!).render( 7 | 8 | 9 | , 10 | ) 11 | -------------------------------------------------------------------------------- /projects/web/src/pages/extract/components/extractor-guide/index.module.scss: -------------------------------------------------------------------------------- 1 | .extractorGuide { 2 | :global { 3 | 4 | .ant-popover-content, .ant-popover-inner { 5 | border-radius: 12px !important; 6 | overflow: hidden; 7 | box-shadow: 0px 8px 26px 0px rgba(0, 0, 0, 0.12); 8 | } 9 | 10 | .ant-popover-inner-content { 11 | padding: 24px !important; 12 | } 13 | 14 | .ant-popover-arrow { 15 | display: none !important; 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /projects/web/src/pages/extract/components/extractor-lang/index.tsx: -------------------------------------------------------------------------------- 1 | import LangChangeIcon from "@/assets/pdf/lang-change.svg"; 2 | import { useLanguageStore } from "@/store/languageStore"; 3 | import cls from "classnames"; 4 | 5 | interface ExtractorLangProps { 6 | className?: string; 7 | } 8 | 9 | const ExtractorLang: React.FC = ({ className }) => { 10 | const { toggleLanguage } = useLanguageStore(); 11 | const changeLang = () => { 12 | toggleLanguage?.(); 13 | }; 14 | return ( 15 | <> 16 | changeLang()} 18 | src={LangChangeIcon} 19 | alt="LangChangeIcon" 20 | className={cls( 21 | "w-[1.5rem] h-[1.5rem] cursor-pointer object-cover hover:bg-[#0D53DE]/[0.1] rounded cursor-pointer", 22 | className 23 | )} 24 | /> 25 | 26 | ); 27 | }; 28 | 29 | export default ExtractorLang; 30 | -------------------------------------------------------------------------------- /projects/web/src/pages/extract/components/extractor-repo/index.tsx: -------------------------------------------------------------------------------- 1 | import githubSvg from "@/assets/pdf/github.svg"; 2 | import { windowOpen } from "@/utils/windowOpen"; 3 | import styles from "./index.module.scss"; 4 | import cls from "classnames"; 5 | 6 | const ExtractorRepo = () => { 7 | return ( 8 |
11 | windowOpen("https://github.com/opendatalab/MinerU", "_blank") 12 | } 13 | > 14 | 15 | 16 | 🎉 17 | 18 |
19 | ); 20 | }; 21 | 22 | export default ExtractorRepo; 23 | -------------------------------------------------------------------------------- /projects/web/src/pages/extract/components/latex-renderer/index.module.scss: -------------------------------------------------------------------------------- 1 | // @import '../../../../global.scss'; 2 | 3 | .customStyle { 4 | padding: 2rem; 5 | padding-top: 0rem; 6 | & > div { 7 | max-width: 100%; 8 | max-height: 100%; 9 | // @include scrollBar(red); 10 | } 11 | .katex-display { 12 | margin-top: 0px !important; 13 | // @include scrollBar(red); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /projects/web/src/pages/extract/components/latex-renderer/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import 'katex/dist/katex.min.css'; 3 | import { BlockMath } from 'react-katex'; 4 | import style from './index.module.scss'; 5 | import classNames from 'classnames'; 6 | 7 | interface LatexRendererProps { 8 | formula: string; 9 | className?: string; 10 | 'aria-label'?: string; 11 | title?: string; 12 | } 13 | 14 | function LatexRenderer({ formula, className = '', 'aria-label': ariaLabel, title }: LatexRendererProps) { 15 | try { 16 | return ( 17 |
21 | 22 |
23 | ); 24 | } catch (error) { 25 | console.error('Error rendering Latex:', error); 26 | return
Unable to render Latex formula.
; 27 | } 28 | } 29 | 30 | export default LatexRenderer; 31 | -------------------------------------------------------------------------------- /projects/web/src/pages/extract/components/loading-icon/index.tsx: -------------------------------------------------------------------------------- 1 | import classNames from "classnames"; 2 | import style from "./index.module.scss"; 3 | 4 | const LoadingIcon = ({ 5 | color, 6 | className, 7 | }: { 8 | color: string; 9 | className?: string; 10 | }) => { 11 | return ( 12 |
13 |
17 |
18 | ); 19 | }; 20 | 21 | export default LoadingIcon; 22 | -------------------------------------------------------------------------------- /projects/web/src/pages/extract/formula/formula-popover/index.module.scss: -------------------------------------------------------------------------------- 1 | .formulaPopover { 2 | :global { 3 | 4 | .ant-popover-content, .ant-popover-inner { 5 | border-radius: 12px !important; 6 | overflow: hidden; 7 | box-shadow: 0px 8px 26px 0px rgba(0, 0, 0, 0.12); 8 | } 9 | 10 | .ant-popover-inner-content { 11 | padding: 24px !important; 12 | } 13 | 14 | .ant-popover-arrow { 15 | display: none !important; 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /projects/web/src/pages/extract/formula/index.module.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/src/pages/extract/formula/index.module.scss -------------------------------------------------------------------------------- /projects/web/src/pages/extract/formula/index.tsx: -------------------------------------------------------------------------------- 1 | import { Outlet } from "react-router-dom"; 2 | 3 | const Formula = () => { 4 | return ( 5 |
6 | 7 |
8 | ); 9 | }; 10 | 11 | export default Formula; 12 | -------------------------------------------------------------------------------- /projects/web/src/pages/extract/index.module.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/src/pages/extract/index.module.scss -------------------------------------------------------------------------------- /projects/web/src/pages/extract/index.tsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/src/pages/extract/index.tsx -------------------------------------------------------------------------------- /projects/web/src/pages/extract/table/index.tsx: -------------------------------------------------------------------------------- 1 | const ExtractorTable = () => { 2 | return <>ExtractorTable; 3 | }; 4 | 5 | export default ExtractorTable; 6 | -------------------------------------------------------------------------------- /projects/web/src/pages/extract/table/table-detail/index.tsx: -------------------------------------------------------------------------------- 1 | const TableDetail = () => { 2 | return <>TableDetail; 3 | }; 4 | 5 | export default TableDetail; 6 | -------------------------------------------------------------------------------- /projects/web/src/pages/home.module.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web/src/pages/home.module.scss -------------------------------------------------------------------------------- /projects/web/src/routes/index.tsx: -------------------------------------------------------------------------------- 1 | import { Routes, Route } from "react-router-dom"; 2 | import PDFUpload from "@/pages/extract/components/pdf-upload"; 3 | import PDFExtractionJob from "@/pages/extract/components/pdf-extraction"; 4 | 5 | function AppRoutes() { 6 | return ( 7 | <> 8 | } /> 9 | } 12 | /> 13 | 14 | ); 15 | } 16 | 17 | export default AppRoutes; 18 | -------------------------------------------------------------------------------- /projects/web/src/styles/variable.scss: -------------------------------------------------------------------------------- 1 | $page-min-witch: 1260px; -------------------------------------------------------------------------------- /projects/web/src/types/extract-task-type.ts: -------------------------------------------------------------------------------- 1 | export type ExtractTaskType = 2 | | "pdf" 3 | | "formula-detect" 4 | | "formula-extract" 5 | | "table-recogn"; 6 | 7 | export const EXTRACTOR_TYPE_LIST = { 8 | table: "table", 9 | formula: "formula", 10 | pdf: "PDF", 11 | }; 12 | 13 | export enum FORMULA_TYPE { 14 | extract = "extract", 15 | detect = "detect", 16 | } 17 | 18 | export enum MD_PREVIEW_TYPE { 19 | preview = "preview", 20 | code = "code", 21 | } 22 | -------------------------------------------------------------------------------- /projects/web/src/utils/locale.ts: -------------------------------------------------------------------------------- 1 | export enum ELocal { 2 | "zh-CN" = "zh-CN", 3 | "en-US" = "en-US", 4 | } 5 | export const locale: { [key: string]: string } = { 6 | [ELocal["zh-CN"]]: "中文", 7 | [ELocal["en-US"]]: "En", 8 | }; 9 | export const localeName: { [key: string]: string } = { 10 | [ELocal["zh-CN"]]: "nameZh", 11 | [ELocal["en-US"]]: "name", 12 | }; 13 | 14 | export const getLocale = () => { 15 | return localStorage.getItem("umi_locale") || ELocal["zh-CN"]; 16 | }; 17 | -------------------------------------------------------------------------------- /projects/web/src/utils/windowOpen.ts: -------------------------------------------------------------------------------- 1 | export const windowOpen = ( 2 | url: string, 3 | type?: "_blank" | "_parent" | "_self" | "_top" 4 | ) => { 5 | const a = document.createElement("a"); 6 | a.setAttribute("href", url); 7 | a.setAttribute("target", type || "_blank"); 8 | a.rel = "noreferrer"; 9 | document.body.appendChild(a); 10 | if (a.click) { 11 | a?.click(); 12 | } else { 13 | try { 14 | let evt = new Event("click", { 15 | bubbles: false, 16 | cancelable: true, 17 | }); 18 | a.dispatchEvent(evt); 19 | } catch (error) { 20 | window.open(url, type || "_blank"); 21 | } 22 | } 23 | document.body.removeChild(a); 24 | }; 25 | -------------------------------------------------------------------------------- /projects/web/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /projects/web/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "useDefineForClassFields": true, 5 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 6 | "module": "ESNext", 7 | "skipLibCheck": true, 8 | 9 | /* Bundler mode */ 10 | "moduleResolution": "bundler", 11 | "allowImportingTsExtensions": true, 12 | "isolatedModules": true, 13 | "moduleDetection": "force", 14 | "noEmit": true, 15 | "jsx": "react-jsx", 16 | 17 | /* Linting */ 18 | "strict": true, 19 | "noUnusedLocals": true, 20 | "noUnusedParameters": true, 21 | "noFallthroughCasesInSwitch": true, 22 | "paths": { 23 | "@/*": ["./src/*"] 24 | } 25 | }, 26 | "include": ["src"] 27 | } 28 | -------------------------------------------------------------------------------- /projects/web/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "compilerOptions": { 4 | // ... other options ... 5 | "types": ["node"] 6 | }, 7 | "references": [ 8 | { "path": "./tsconfig.app.json" }, 9 | { "path": "./tsconfig.node.json" } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /projects/web/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "lib": ["ES2023"], 5 | "module": "ESNext", 6 | "skipLibCheck": true, 7 | 8 | /* Bundler mode */ 9 | "moduleResolution": "bundler", 10 | "allowImportingTsExtensions": true, 11 | "isolatedModules": true, 12 | "moduleDetection": "force", 13 | "noEmit": true, 14 | 15 | /* Linting */ 16 | "strict": true, 17 | "noUnusedLocals": true, 18 | "noUnusedParameters": true, 19 | "noFallthroughCasesInSwitch": true 20 | }, 21 | "include": ["vite.config.ts"] 22 | } 23 | -------------------------------------------------------------------------------- /projects/web/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import react from "@vitejs/plugin-react"; 3 | import path from "path"; 4 | 5 | // https://vitejs.dev/config/ 6 | export default defineConfig({ 7 | plugins: [react()], 8 | server: { 9 | proxy: { 10 | "/api": { 11 | target: "http://localhost:5559", 12 | changeOrigin: true, 13 | }, 14 | }, 15 | }, 16 | css: { 17 | modules: { 18 | localsConvention: "camelCaseOnly", // transfer kebab-case to camelCase 19 | scopeBehaviour: "local", 20 | generateScopedName: "[name]__[local]___[hash:base64:5]", 21 | }, 22 | }, 23 | publicDir: "public", 24 | resolve: { 25 | alias: { 26 | "@": path.resolve(__dirname, "./src"), 27 | }, 28 | }, 29 | }); 30 | -------------------------------------------------------------------------------- /projects/web_api/README.md: -------------------------------------------------------------------------------- 1 | # 基于MinerU的PDF解析API 2 | 3 | - MinerU的GPU镜像构建 4 | - 基于FastAPI的PDF解析接口 5 | 6 | ## 构建方式 7 | 8 | ``` 9 | docker build -t mineru-api . 10 | ``` 11 | 12 | 或者使用代理: 13 | 14 | ``` 15 | docker build --build-arg http_proxy=http://127.0.0.1:7890 --build-arg https_proxy=http://127.0.0.1:7890 -t mineru-api . 16 | ``` 17 | 18 | ## 启动命令 19 | 20 | ``` 21 | docker run --rm -it --gpus=all -p 8000:8000 mineru-api 22 | ``` 23 | 24 | ## 测试参数 25 | 26 | 访问地址: 27 | 28 | ``` 29 | http://localhost:8000/docs 30 | http://127.0.0.1:8000/docs 31 | ``` -------------------------------------------------------------------------------- /projects/web_api/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | . /app/venv/bin/activate 5 | exec uvicorn app:app "$@" 6 | -------------------------------------------------------------------------------- /projects/web_api/requirements.txt: -------------------------------------------------------------------------------- 1 | magic-pdf[full] 2 | 3 | fastapi 4 | uvicorn 5 | python-multipart 6 | -------------------------------------------------------------------------------- /projects/web_demo/images/web_demo_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web_demo/images/web_demo_1.png -------------------------------------------------------------------------------- /projects/web_demo/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "web-api" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["houlinfeng "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.10" 10 | flask = "^3.0.3" 11 | flask-restful = "^0.3.10" 12 | flask-cors = "^5.0.0" 13 | flask-sqlalchemy = "^3.1.1" 14 | flask-migrate = "^4.0.7" 15 | flask-jwt-extended = "^4.6.0" 16 | flask-marshmallow = "^1.2.1" 17 | pyyaml = "^6.0.2" 18 | loguru = "^0.7.2" 19 | marshmallow-sqlalchemy = "^1.1.0" 20 | 21 | 22 | [build-system] 23 | requires = ["poetry-core"] 24 | build-backend = "poetry.core.masonry.api" 25 | -------------------------------------------------------------------------------- /projects/web_demo/requirements.txt: -------------------------------------------------------------------------------- 1 | flask-cors 2 | flask-jwt-extended 3 | flask-marshmallow 4 | flask-migrate 5 | flask-restful 6 | flask-sqlalchemy 7 | flask 8 | greenlet 9 | loguru 10 | marshmallow-sqlalchemy 11 | marshmallow 12 | pyjwt 13 | pyyaml 14 | -------------------------------------------------------------------------------- /projects/web_demo/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web_demo/tests/__init__.py -------------------------------------------------------------------------------- /projects/web_demo/web_demo/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["common", "api"] -------------------------------------------------------------------------------- /projects/web_demo/web_demo/api/analysis/ext.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | task_state_map = { 4 | 0: "running", 5 | 1: "done", 6 | 2: "pending" 7 | } 8 | 9 | 10 | def find_file(file_key, file_dir): 11 | """ 12 | 查询文件 13 | :param file_key: 文件哈希 14 | :param file_dir: 文件目录 15 | :return: 16 | """ 17 | pdf_path = "" 18 | for root, subDirs, files in os.walk(file_dir): 19 | for fileName in files: 20 | if fileName.startswith(file_key): 21 | pdf_path = os.path.join(root, fileName) 22 | break 23 | if pdf_path: 24 | break 25 | return pdf_path 26 | -------------------------------------------------------------------------------- /projects/web_demo/web_demo/api/react_app/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from flask import Blueprint 3 | from ..extentions import app, Api 4 | from .react_app_view import ReactAppView 5 | from loguru import logger 6 | 7 | folder = Path(app.config.get("REACT_APP_DIST", "../../web/dist/")).resolve() 8 | logger.info(f"react_app folder: {folder}") 9 | react_app_blue = Blueprint('react_app', __name__, static_folder=folder, static_url_path='', template_folder=folder) 10 | react_app_api = Api(react_app_blue, prefix='') 11 | react_app_api.add_resource(ReactAppView, '/') -------------------------------------------------------------------------------- /projects/web_demo/web_demo/api/react_app/react_app_view.py: -------------------------------------------------------------------------------- 1 | from flask import render_template, Response 2 | from flask_restful import Resource 3 | 4 | 5 | class ReactAppView(Resource): 6 | def get(self): 7 | # 创建自定义的响应对象 8 | rendered_template = render_template('index.html') 9 | response = Response(rendered_template, mimetype='text/html') 10 | 11 | return response 12 | -------------------------------------------------------------------------------- /projects/web_demo/web_demo/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web_demo/web_demo/common/__init__.py -------------------------------------------------------------------------------- /projects/web_demo/web_demo/common/custom_response.py: -------------------------------------------------------------------------------- 1 | from flask import jsonify 2 | 3 | 4 | class ResponseCode: 5 | SUCCESS = 200 6 | PARAM_WARING = 400 7 | MESSAGE = "success" 8 | 9 | 10 | def generate_response(data=None, code=ResponseCode.SUCCESS, msg=ResponseCode.MESSAGE, **kwargs): 11 | """ 12 | 自定义响应 13 | :param code:状态码 14 | :param data:返回数据 15 | :param msg:返回消息 16 | :param kwargs: 17 | :return: 18 | """ 19 | msg = msg or 'success' if code == 200 else msg or 'fail' 20 | success = True if code == 200 else False 21 | res = jsonify(dict(code=code, success=success, data=data, msg=msg, **kwargs)) 22 | res.status_code = 200 23 | return res 24 | -------------------------------------------------------------------------------- /projects/web_demo/web_demo/common/import_models.py: -------------------------------------------------------------------------------- 1 | from api.analysis.models import * -------------------------------------------------------------------------------- /projects/web_demo/web_demo/common/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | from loguru import logger 3 | from pathlib import Path 4 | from datetime import datetime 5 | 6 | 7 | def setup_log(config): 8 | """ 9 | Setup logging 10 | :param config: config file 11 | :return: 12 | """ 13 | log_path = os.path.join(Path(__file__).parent.parent, "log") 14 | if not Path(log_path).exists(): 15 | Path(log_path).mkdir(parents=True, exist_ok=True) 16 | log_level = config.get("LOG_LEVEL") 17 | log_name = f'log_{datetime.now().strftime("%Y-%m-%d")}.log' 18 | log_file_path = os.path.join(log_path, log_name) 19 | logger.add(str(log_file_path), rotation='00:00', encoding='utf-8', level=log_level, enqueue=True) 20 | -------------------------------------------------------------------------------- /projects/web_demo/web_demo/common/mk_markdown/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web_demo/web_demo/common/mk_markdown/__init__.py -------------------------------------------------------------------------------- /projects/web_demo/web_demo/common/mk_markdown/libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web_demo/web_demo/common/mk_markdown/libs/__init__.py -------------------------------------------------------------------------------- /projects/web_demo/web_demo/common/mk_markdown/resources/fasttext-langdetect/lid.176.ftz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web_demo/web_demo/common/mk_markdown/resources/fasttext-langdetect/lid.176.ftz -------------------------------------------------------------------------------- /projects/web_demo/web_demo/common/web_hook.py: -------------------------------------------------------------------------------- 1 | 2 | def before_request(): 3 | return None 4 | 5 | 6 | def after_request(response): 7 | response.headers.add('Access-Control-Allow-Origin', '*') 8 | response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization') 9 | return response 10 | -------------------------------------------------------------------------------- /projects/web_demo/web_demo/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web_demo/web_demo/config/__init__.py -------------------------------------------------------------------------------- /projects/web_demo/web_demo/config/config.yaml: -------------------------------------------------------------------------------- 1 | # 基本配置 2 | BaseConfig: &base 3 | DEBUG: false 4 | PORT: 5559 5 | LOG_LEVEL: "DEBUG" 6 | SQLALCHEMY_TRACK_MODIFICATIONS: true 7 | SQLALCHEMY_DATABASE_URI: "" 8 | PROPAGATE_EXCEPTIONS: true 9 | SECRET_KEY: "#$%^&**$##*(*^%%$**((&" 10 | JWT_SECRET_KEY: "#$%^&**$##*(*^%%$**((&" 11 | JWT_ACCESS_TOKEN_EXPIRES: 3600 12 | PDF_UPLOAD_FOLDER: "upload_pdf" 13 | PDF_ANALYSIS_FOLDER: "analysis_pdf" 14 | # 前端项目打包的路径 15 | REACT_APP_DIST: "../../web/dist/" 16 | # 文件访问路径 17 | FILE_API: "/api/v2/analysis/pdf_img?as_attachment=False" 18 | 19 | # 开发配置 20 | DevelopmentConfig: 21 | <<: *base 22 | database: 23 | type: sqlite 24 | path: config/mineru_web.db 25 | 26 | # 生产配置 27 | ProductionConfig: 28 | <<: *base 29 | 30 | # 测试配置 31 | TestingConfig: 32 | <<: *base 33 | 34 | # 当前使用配置 35 | CurrentConfig: "DevelopmentConfig" 36 | -------------------------------------------------------------------------------- /projects/web_demo/web_demo/config/mineru_web.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web_demo/web_demo/config/mineru_web.db -------------------------------------------------------------------------------- /projects/web_demo/web_demo/static/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/projects/web_demo/web_demo/static/__init__.py -------------------------------------------------------------------------------- /requirements-qa.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | Levenshtein 3 | nltk 4 | rapidfuzz 5 | statistics 6 | openxlab #安装opendatalab 7 | pandas 8 | numpy 9 | matplotlib 10 | seaborn 11 | scipy 12 | scikit-learn 13 | tqdm 14 | htmltabletomd 15 | pypandoc 16 | pyopenssl==24.0.0 17 | struct-eqtable==0.1.0 18 | pytest-cov 19 | beautifulsoup4 20 | coverage -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.28.43 2 | Brotli>=1.1.0 3 | click>=8.1.7 4 | fast-langdetect>=0.2.3,<0.3.0 5 | loguru>=0.6.0 6 | numpy>=1.21.6 7 | pydantic>=2.7.2,<2.11 8 | PyMuPDF>=1.24.9,<1.25.0 9 | scikit-learn>=1.0.2 10 | torch>=2.2.2,!=2.5.0,!=2.5.1,<3 11 | torchvision 12 | transformers>=4.49.0,!=4.51.0,<5.0.0 13 | pdfminer.six==20250506 14 | tqdm>=4.67.1 15 | # The requirements.txt must ensure that only necessary external dependencies are introduced. If there are new dependencies to add, please contact the project administrator. 16 | -------------------------------------------------------------------------------- /tests/clean_coverage.py: -------------------------------------------------------------------------------- 1 | """ 2 | clean coverage 3 | """ 4 | import os 5 | import shutil 6 | 7 | def delete_file(path): 8 | """delete file.""" 9 | if not os.path.exists(path): 10 | if os.path.isfile(path): 11 | try: 12 | os.remove(path) 13 | print(f"File '{path}' deleted.") 14 | except TypeError as e: 15 | print(f"Error deleting file '{path}': {e}") 16 | elif os.path.isdir(path): 17 | try: 18 | shutil.rmtree(path) 19 | print(f"Directory '{path}' and its contents deleted.") 20 | except TypeError as e: 21 | print(f"Error deleting directory '{path}': {e}") 22 | 23 | if __name__ == "__main__": 24 | delete_file("htmlcov/") 25 | #delete_file(".coverage") 26 | -------------------------------------------------------------------------------- /tests/get_coverage.py: -------------------------------------------------------------------------------- 1 | """ 2 | get cov 3 | """ 4 | from bs4 import BeautifulSoup 5 | import shutil 6 | def get_covrage(): 7 | """get covrage""" 8 | # 发送请求获取网页内容 9 | html_content = open("htmlcov/index.html", "r", encoding="utf-8").read() 10 | soup = BeautifulSoup(html_content, 'html.parser') 11 | 12 | # 查找包含"pc_cov"的span标签 13 | pc_cov_span = soup.find('span', class_='pc_cov') 14 | 15 | # 提取百分比值 16 | percentage_value = pc_cov_span.text.strip() 17 | percentage_float = float(percentage_value.rstrip('%')) 18 | print ("percentage_float:", percentage_float) 19 | assert percentage_float >= 0.2 20 | 21 | if __name__ == '__main__': 22 | get_covrage() -------------------------------------------------------------------------------- /tests/test_cli/conf/__init__py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/test_cli/conf/__init__py -------------------------------------------------------------------------------- /tests/test_cli/conf/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | conf = { 3 | "code_path": os.environ.get('GITHUB_WORKSPACE'), 4 | "pdf_dev_path" : os.environ.get('GITHUB_WORKSPACE') + "/tests/test_cli/pdf_dev", 5 | #"code_path": "/home/quyuan/ci/actions-runner/MinerU", 6 | #"pdf_dev_path": "/home/quyuan/ci/actions-runner/MinerU/tests/test_cli/pdf_dev", 7 | "pdf_res_path": "/tmp/magic-pdf", 8 | "jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl", 9 | "s3_pdf_path": "s3://llm-qatest-pnorm/mineru/test/test_rearch_report.pdf" 10 | } 11 | -------------------------------------------------------------------------------- /tests/test_cli/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | def clear_gpu_memory(): 5 | ''' 6 | clear GPU memory 7 | ''' 8 | torch.cuda.empty_cache() 9 | print("GPU memory cleared.") 10 | 11 | -------------------------------------------------------------------------------- /tests/test_cli/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/test_cli/lib/__init__.py -------------------------------------------------------------------------------- /tests/test_cli/magic-pdf.json: -------------------------------------------------------------------------------- 1 | { 2 | "bucket_info":{ 3 | "bucket-name-1":["ak", "sk", "endpoint"], 4 | "bucket-name-2":["ak", "sk", "endpoint"] 5 | }, 6 | "temp-output-dir":"/tmp", 7 | "models-dir":"/tmp/models", 8 | "device-mode":"cpu" 9 | } -------------------------------------------------------------------------------- /tests/test_cli/pdf_dev/doc/test_mineru.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/test_cli/pdf_dev/doc/test_mineru.docx -------------------------------------------------------------------------------- /tests/test_cli/pdf_dev/images/docstructbench.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/test_cli/pdf_dev/images/docstructbench.jpg -------------------------------------------------------------------------------- /tests/test_cli/pdf_dev/pdf/test_rearch_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/test_cli/pdf_dev/pdf/test_rearch_report.pdf -------------------------------------------------------------------------------- /tests/test_cli/pdf_dev/ppt/small.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/test_cli/pdf_dev/ppt/small.pptx -------------------------------------------------------------------------------- /tests/test_cli/pdf_dev/result.json: -------------------------------------------------------------------------------- 1 | {"average_sim_score":0.6505598645664856, "average_edit_distance":0.2514908429188901, "average_bleu_score": 0.5808819533975296} -------------------------------------------------------------------------------- /tests/unittest/test_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_data/__init__.py -------------------------------------------------------------------------------- /tests/unittest/test_data/assets/jsonl/test_01.jsonl: -------------------------------------------------------------------------------- 1 | {"track_id":"e8824f5a-9fcb-4ee5-b2d4-6bf2c67019dc","path":"s3://sci-hub/enbook-scimag/78800000/libgen.scimag78872000-78872999/10.1017/cbo9780511770425.012.pdf","file_type":"pdf","content_type":"application/pdf","content_length":80078,"title":"German Idealism and the Concept of Punishment || Conclusion","remark":{"file_id":"scihub_78800000/libgen.scimag78872000-78872999.zip_10.1017/cbo9780511770425.012","file_source_type":"paper","original_file_id":"10.1017/cbo9780511770425.012","file_name":"10.1017/cbo9780511770425.012.pdf","author":"Merle, Jean-Christophe"}} 2 | -------------------------------------------------------------------------------- /tests/unittest/test_data/assets/jsonl/test_02.jsonl: -------------------------------------------------------------------------------- 1 | {"track_id":"e8824f5a-9fcb-4ee5-b2d4-6bf2c67019dc","path":"tests/unittest/test_data/assets/pdfs/test_02.pdf","file_type":"pdf","content_type":"application/pdf","content_length":80078,"title":"German Idealism and the Concept of Punishment || Conclusion","remark":{"file_id":"scihub_78800000/libgen.scimag78872000-78872999.zip_10.1017/cbo9780511770425.012","file_source_type":"paper","original_file_id":"10.1017/cbo9780511770425.012","file_name":"10.1017/cbo9780511770425.012.pdf","author":"Merle, Jean-Christophe"}} 2 | -------------------------------------------------------------------------------- /tests/unittest/test_data/assets/pdfs/test_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_data/assets/pdfs/test_01.pdf -------------------------------------------------------------------------------- /tests/unittest/test_data/assets/pdfs/test_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_data/assets/pdfs/test_02.pdf -------------------------------------------------------------------------------- /tests/unittest/test_data/assets/pngs/test_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_data/assets/pngs/test_01.png -------------------------------------------------------------------------------- /tests/unittest/test_data/assets/pngs/test_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_data/assets/pngs/test_02.png -------------------------------------------------------------------------------- /tests/unittest/test_data/data_reader_writer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_data/data_reader_writer/__init__.py -------------------------------------------------------------------------------- /tests/unittest/test_data/data_reader_writer/test_filebase.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | from magic_pdf.data.data_reader_writer import (FileBasedDataReader, 5 | FileBasedDataWriter) 6 | 7 | 8 | def test_filebased_reader_writer(): 9 | 10 | unitest_dir = '/tmp/magic_pdf/unittest/data/filebased_reader_writer' 11 | sub_dir = os.path.join(unitest_dir, 'sub') 12 | abs_fn = os.path.join(unitest_dir, 'abspath.txt') 13 | 14 | os.makedirs(sub_dir, exist_ok=True) 15 | 16 | writer = FileBasedDataWriter(sub_dir) 17 | reader = FileBasedDataReader(sub_dir) 18 | 19 | writer.write('test.txt', b'hello world') 20 | assert reader.read('test.txt') == b'hello world' 21 | 22 | writer.write(abs_fn, b'hello world') 23 | assert reader.read(abs_fn) == b'hello world' 24 | shutil.rmtree(unitest_dir) 25 | -------------------------------------------------------------------------------- /tests/unittest/test_data/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_data/io/__init__.py -------------------------------------------------------------------------------- /tests/unittest/test_data/test_dataset.py: -------------------------------------------------------------------------------- 1 | 2 | from magic_pdf.data.dataset import ImageDataset, PymuDocDataset 3 | 4 | 5 | def test_pymudataset(): 6 | with open('tests/unittest/test_data/assets/pdfs/test_01.pdf', 'rb') as f: 7 | bits = f.read() 8 | datasets = PymuDocDataset(bits) 9 | assert len(datasets) > 0 10 | assert datasets.get_page(0).get_page_info().h > 100 11 | 12 | 13 | def test_imagedataset(): 14 | with open('tests/unittest/test_data/assets/pngs/test_01.png', 'rb') as f: 15 | bits = f.read() 16 | datasets = ImageDataset(bits) 17 | assert len(datasets) == 1 18 | assert datasets.get_page(0).get_page_info().w > 100 19 | -------------------------------------------------------------------------------- /tests/unittest/test_integrations/test_rag/assets/one_page_with_table_image.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_integrations/test_rag/assets/one_page_with_table_image.2.pdf -------------------------------------------------------------------------------- /tests/unittest/test_integrations/test_rag/assets/one_page_with_table_image.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_integrations/test_rag/assets/one_page_with_table_image.pdf -------------------------------------------------------------------------------- /tests/unittest/test_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_model/__init__.py -------------------------------------------------------------------------------- /tests/unittest/test_model/assets/test_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_model/assets/test_01.pdf -------------------------------------------------------------------------------- /tests/unittest/test_model/assets/test_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_model/assets/test_02.pdf -------------------------------------------------------------------------------- /tests/unittest/test_table/assets/table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_table/assets/table.jpg -------------------------------------------------------------------------------- /tests/unittest/test_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_tools/__init__.py -------------------------------------------------------------------------------- /tests/unittest/test_tools/assets/cli/path/cli_test_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_tools/assets/cli/path/cli_test_01.pdf -------------------------------------------------------------------------------- /tests/unittest/test_tools/assets/cli/path/cli_test_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_tools/assets/cli/path/cli_test_02.pdf -------------------------------------------------------------------------------- /tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf -------------------------------------------------------------------------------- /tests/unittest/test_tools/assets/cli_dev/cli_test_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_tools/assets/cli_dev/cli_test_01.pdf -------------------------------------------------------------------------------- /tests/unittest/test_tools/assets/common/cli_test_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/MinerU/a911c29fbb9fe175a034c7a2f49abd7581088cd6/tests/unittest/test_tools/assets/common/cli_test_01.pdf -------------------------------------------------------------------------------- /update_version.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | 5 | def get_version(): 6 | command = ["git", "describe", "--tags"] 7 | try: 8 | version = subprocess.check_output(command).decode().strip() 9 | version_parts = version.split("-") 10 | if len(version_parts) > 1 and version_parts[0].startswith("magic_pdf"): 11 | return version_parts[1] 12 | else: 13 | raise ValueError(f"Invalid version tag {version}. Expected format is magic_pdf--released.") 14 | except Exception as e: 15 | print(e) 16 | return "0.0.0" 17 | 18 | 19 | def write_version_to_commons(version): 20 | commons_path = os.path.join(os.path.dirname(__file__), 'magic_pdf', 'libs', 'version.py') 21 | with open(commons_path, 'w') as f: 22 | f.write(f'__version__ = "{version}"\n') 23 | 24 | 25 | if __name__ == '__main__': 26 | version_name = get_version() 27 | write_version_to_commons(version_name) 28 | --------------------------------------------------------------------------------