├── .gitignore ├── .readthedocs.yaml ├── .vscode └── launch.json ├── LICENSE.md ├── README.md ├── README_zh-CN.md ├── assets ├── demo │ ├── PDFs │ │ ├── DDPM.pdf │ │ └── UniMERNet.pdf │ ├── formula_detection │ │ ├── textbook.png │ │ └── wikipedia_sce.png │ ├── formula_recognition │ │ ├── cpe.png │ │ ├── hwe.png │ │ ├── sce.png │ │ └── spe.png │ ├── layout_detection │ │ ├── exam_paper.png │ │ ├── financial_report.png │ │ ├── fuzzy_scan.png │ │ ├── paper.png │ │ ├── slides.png │ │ └── watermark.png │ ├── ocr │ │ ├── ocr_001.png │ │ └── ocr_002.png │ └── table_parsing │ │ └── table_001.png └── readme │ ├── datalab_logo.png │ ├── layout_example.png │ ├── mfd_example.png │ ├── modelscope_logo.png │ ├── pdf-extract-kit_logo.png │ ├── pipeline.png │ ├── table_expamle.png │ └── unimernet_result.jpg ├── configs ├── config.yaml ├── formula_detection.yaml ├── formula_recognition.yaml ├── layout_detection.yaml ├── layout_detection_layoutlmv3.yaml ├── layout_detection_yolo.yaml ├── ocr.yaml └── table_parsing.yaml ├── docs ├── en │ ├── .readthedocs.yaml │ ├── Makefile │ ├── _static │ │ └── image │ │ │ └── logo.png │ ├── algorithm │ │ ├── formula_detection.rst │ │ ├── formula_recognition.rst │ │ ├── layout_detection.rst │ │ ├── ocr.rst │ │ ├── reading_order.rst │ │ └── table_recognition.rst │ ├── conf copy.py │ ├── conf.bak │ ├── conf.py │ ├── evaluation │ │ ├── formula_detection.rst │ │ ├── formula_recognition.rst │ │ ├── layout_detection.rst │ │ ├── ocr.rst │ │ ├── pdf_extract.rst │ │ ├── reading_order.rst │ │ └── table_recognition.rst │ ├── get_started │ │ ├── installation.rst │ │ ├── pretrained_model.rst │ │ └── quickstart.rst │ ├── index.rst │ ├── make.bat │ ├── models │ │ └── supported.md │ ├── notes │ │ └── changelog.md │ ├── project │ │ ├── doc_translate.rst │ │ ├── pdf_extract.rst │ │ └── speed_up.rst │ ├── switch_language.md │ └── task_extend │ │ ├── code.rst │ │ ├── doc.rst │ │ └── evaluation.rst ├── requirements.txt └── zh_cn │ ├── .readthedocs.yaml │ ├── Makefile │ ├── _build │ ├── doctrees │ │ ├── algorithm │ │ │ ├── formula_detection.doctree │ │ │ ├── formula_recognition.doctree │ │ │ ├── layout_detection.doctree │ │ │ ├── ocr.doctree │ │ │ ├── reading_order.doctree │ │ │ └── table_recognition.doctree │ │ ├── environment.pickle │ │ ├── evaluation │ │ │ ├── formula_detection.doctree │ │ │ ├── formula_recognition.doctree │ │ │ ├── layout_detection.doctree │ │ │ ├── ocr.doctree │ │ │ ├── pdf_extract.doctree │ │ │ ├── reading_order.doctree │ │ │ └── table_recognition.doctree │ │ ├── get_started │ │ │ ├── installation.doctree │ │ │ ├── pretrained_model.doctree │ │ │ └── quickstart.doctree │ │ ├── index.doctree │ │ ├── models │ │ │ └── supported.doctree │ │ ├── notes │ │ │ └── changelog.doctree │ │ ├── project │ │ │ ├── doc_translate.doctree │ │ │ ├── pdf_extract.doctree │ │ │ └── speed_up.doctree │ │ ├── switch_language.doctree │ │ └── task_extend │ │ │ ├── code.doctree │ │ │ ├── doc.doctree │ │ │ └── evaluation.doctree │ └── html │ │ ├── .buildinfo │ │ ├── _images │ │ └── logo.png │ │ ├── _sources │ │ ├── algorithm │ │ │ ├── formula_detection.rst │ │ │ ├── formula_recognition.rst │ │ │ ├── layout_detection.rst │ │ │ ├── ocr.rst │ │ │ ├── reading_order.rst │ │ │ └── table_recognition.rst │ │ ├── evaluation │ │ │ ├── formula_detection.rst │ │ │ ├── formula_recognition.rst │ │ │ ├── layout_detection.rst │ │ │ ├── ocr.rst │ │ │ ├── pdf_extract.rst │ │ │ ├── reading_order.rst │ │ │ └── table_recognition.rst │ │ ├── get_started │ │ │ ├── installation.rst │ │ │ ├── pretrained_model.rst │ │ │ └── quickstart.rst │ │ ├── index.rst │ │ ├── models │ │ │ └── supported.md │ │ ├── notes │ │ │ └── changelog.md │ │ ├── project │ │ │ ├── doc_translate.rst │ │ │ ├── pdf_extract.rst │ │ │ └── speed_up.rst │ │ ├── switch_language.md │ │ └── task_extend │ │ │ ├── code.rst │ │ │ ├── doc.rst │ │ │ └── evaluation.rst │ │ ├── _static │ │ ├── basic.css │ │ ├── check-solid.svg │ │ ├── clipboard.min.js │ │ ├── copy-button.svg │ │ ├── copybutton.css │ │ ├── copybutton.js │ │ ├── copybutton_funcs.js │ │ ├── doctools.js │ │ ├── documentation_options.js │ │ ├── file.png │ │ ├── images │ │ │ ├── logo_binder.svg │ │ │ ├── logo_colab.png │ │ │ ├── logo_deepnote.svg │ │ │ └── logo_jupyterhub.svg │ │ ├── language_data.js │ │ ├── locales │ │ │ ├── ar │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── bg │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── bn │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── ca │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── cs │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── da │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── de │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── el │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── eo │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── es │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── et │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── fi │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── fr │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── hr │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── id │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── it │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── iw │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── ja │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── ko │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── lt │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── lv │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── ml │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── mr │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── ms │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── nl │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── no │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── pl │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── pt │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── ro │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── ru │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── sk │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── sl │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── sr │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── sv │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── ta │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── te │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── tg │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── th │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── tl │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── tr │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── uk │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── ur │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── vi │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ ├── zh_CN │ │ │ │ └── LC_MESSAGES │ │ │ │ │ ├── booktheme.mo │ │ │ │ │ └── booktheme.po │ │ │ └── zh_TW │ │ │ │ └── LC_MESSAGES │ │ │ │ ├── booktheme.mo │ │ │ │ └── booktheme.po │ │ ├── logo.png │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── sbt-webpack-macros.html │ │ ├── scripts │ │ │ ├── bootstrap.js │ │ │ ├── bootstrap.js.LICENSE.txt │ │ │ ├── bootstrap.js.map │ │ │ ├── pydata-sphinx-theme.js │ │ │ ├── pydata-sphinx-theme.js.map │ │ │ ├── sphinx-book-theme.js │ │ │ └── sphinx-book-theme.js.map │ │ ├── searchtools.js │ │ ├── sphinx_highlight.js │ │ ├── styles │ │ │ ├── bootstrap.css │ │ │ ├── bootstrap.css.map │ │ │ ├── pydata-sphinx-theme.css │ │ │ ├── pydata-sphinx-theme.css.map │ │ │ ├── sphinx-book-theme.css │ │ │ ├── sphinx-book-theme.css.map │ │ │ └── theme.css │ │ ├── translations.js │ │ ├── vendor │ │ │ └── fontawesome │ │ │ │ └── 6.5.2 │ │ │ │ ├── LICENSE.txt │ │ │ │ ├── css │ │ │ │ └── all.min.css │ │ │ │ ├── js │ │ │ │ ├── all.min.js │ │ │ │ └── all.min.js.LICENSE.txt │ │ │ │ └── webfonts │ │ │ │ ├── fa-brands-400.ttf │ │ │ │ ├── fa-brands-400.woff2 │ │ │ │ ├── fa-regular-400.ttf │ │ │ │ ├── fa-regular-400.woff2 │ │ │ │ ├── fa-solid-900.ttf │ │ │ │ ├── fa-solid-900.woff2 │ │ │ │ ├── fa-v4compatibility.ttf │ │ │ │ └── fa-v4compatibility.woff2 │ │ └── webpack-macros.html │ │ ├── algorithm │ │ ├── formula_detection.html │ │ ├── formula_recognition.html │ │ ├── layout_detection.html │ │ ├── ocr.html │ │ ├── reading_order.html │ │ └── table_recognition.html │ │ ├── evaluation │ │ ├── formula_detection.html │ │ ├── formula_recognition.html │ │ ├── layout_detection.html │ │ ├── ocr.html │ │ ├── pdf_extract.html │ │ ├── reading_order.html │ │ └── table_recognition.html │ │ ├── genindex.html │ │ ├── get_started │ │ ├── installation.html │ │ ├── pretrained_model.html │ │ └── quickstart.html │ │ ├── index.html │ │ ├── models │ │ └── supported.html │ │ ├── notes │ │ └── changelog.html │ │ ├── objects.inv │ │ ├── project │ │ ├── doc_translate.html │ │ ├── pdf_extract.html │ │ └── speed_up.html │ │ ├── search.html │ │ ├── searchindex.js │ │ ├── switch_language.html │ │ └── task_extend │ │ ├── code.html │ │ ├── doc.html │ │ └── evaluation.html │ ├── _static │ └── image │ │ └── logo.png │ ├── algorithm │ ├── formula_detection.rst │ ├── formula_recognition.rst │ ├── layout_detection.rst │ ├── ocr.rst │ ├── reading_order.rst │ └── table_recognition.rst │ ├── conf.py │ ├── evaluation │ ├── formula_detection.rst │ ├── formula_recognition.rst │ ├── layout_detection.rst │ ├── ocr.rst │ ├── pdf_extract.rst │ ├── reading_order.rst │ └── table_recognition.rst │ ├── get_started │ ├── installation.rst │ ├── pretrained_model.rst │ └── quickstart.rst │ ├── index.rst │ ├── make.bat │ ├── models │ └── supported.md │ ├── notes │ └── changelog.md │ ├── project │ ├── doc_translate.rst │ ├── pdf_extract.rst │ └── speed_up.rst │ ├── switch_language.md │ └── task_extend │ ├── code.rst │ ├── doc.rst │ └── evaluation.rst ├── pdf_extract_kit ├── __init__.py ├── configs │ └── unimernet.yaml ├── dataset │ ├── __init__.py │ └── dataset.py ├── registry │ ├── __init__.py │ └── registry.py ├── tasks │ ├── __init__.py │ ├── base_task.py │ ├── formula_detection │ │ ├── __init__.py │ │ ├── models │ │ │ └── yolo.py │ │ └── task.py │ ├── formula_recognition │ │ ├── __init__.py │ │ ├── models │ │ │ └── unimernet.py │ │ └── task.py │ ├── layout_detection │ │ ├── __init__.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── layoutlmv3.py │ │ │ ├── layoutlmv3_util │ │ │ │ ├── backbone.py │ │ │ │ ├── beit.py │ │ │ │ ├── deit.py │ │ │ │ ├── layoutlmft │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── data │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── cord.py │ │ │ │ │ │ ├── data_collator.py │ │ │ │ │ │ ├── funsd.py │ │ │ │ │ │ ├── image_utils.py │ │ │ │ │ │ └── xfund.py │ │ │ │ │ └── models │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── layoutlmv3 │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── configuration_layoutlmv3.py │ │ │ │ │ │ ├── modeling_layoutlmv3.py │ │ │ │ │ │ ├── tokenization_layoutlmv3.py │ │ │ │ │ │ └── tokenization_layoutlmv3_fast.py │ │ │ │ ├── layoutlmv3_base_inference.yaml │ │ │ │ ├── model_init.py │ │ │ │ ├── rcnn_vl.py │ │ │ │ └── visualizer.py │ │ │ └── yolo.py │ │ └── task.py │ ├── ocr │ │ ├── __init__.py │ │ ├── models │ │ │ └── paddle_ocr.py │ │ └── task.py │ └── table_parsing │ │ ├── __init__.py │ │ ├── models │ │ └── struct_eqtable.py │ │ └── task.py ├── utils │ ├── __init__.py │ ├── config_loader.py │ ├── data_preprocess.py │ ├── merge_blocks_and_spans.py │ ├── pdf_utils.py │ └── visualization.py └── version.py ├── project └── pdf2markdown │ ├── README.md │ ├── configs │ └── pdf2markdown.yaml │ ├── demo.png │ └── scripts │ ├── pdf2markdown.py │ └── run_project.py ├── pyproject.toml ├── requirements-cpu.txt ├── requirements.txt ├── requirements └── docs.txt └── scripts ├── formula_detection.py ├── formula_recognition.py ├── layout_detection.py ├── ocr.py ├── run_task.py └── table_parsing.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.ipynb* 2 | *.ipynb 3 | 4 | # local data 5 | outputs/* 6 | data/* 7 | temp* 8 | test* 9 | 10 | # python 11 | .ipynb_checkpoints 12 | *.ipynb 13 | **/__pycache__/ 14 | 15 | # logs 16 | *.log 17 | *.out 18 | 19 | models/* 20 | 21 | # Sphinx documentation 22 | docs/*/_build/ 23 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.10" 7 | 8 | formats: 9 | - epub 10 | 11 | python: 12 | install: 13 | - requirements: requirements/docs.txt 14 | 15 | sphinx: 16 | configuration: docs/zh_cn/conf.py 17 | -------------------------------------------------------------------------------- /assets/demo/PDFs/DDPM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/PDFs/DDPM.pdf -------------------------------------------------------------------------------- /assets/demo/PDFs/UniMERNet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/PDFs/UniMERNet.pdf -------------------------------------------------------------------------------- /assets/demo/formula_detection/textbook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/formula_detection/textbook.png -------------------------------------------------------------------------------- /assets/demo/formula_detection/wikipedia_sce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/formula_detection/wikipedia_sce.png -------------------------------------------------------------------------------- /assets/demo/formula_recognition/cpe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/formula_recognition/cpe.png -------------------------------------------------------------------------------- /assets/demo/formula_recognition/hwe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/formula_recognition/hwe.png -------------------------------------------------------------------------------- /assets/demo/formula_recognition/sce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/formula_recognition/sce.png -------------------------------------------------------------------------------- /assets/demo/formula_recognition/spe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/formula_recognition/spe.png -------------------------------------------------------------------------------- /assets/demo/layout_detection/exam_paper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/layout_detection/exam_paper.png -------------------------------------------------------------------------------- /assets/demo/layout_detection/financial_report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/layout_detection/financial_report.png -------------------------------------------------------------------------------- /assets/demo/layout_detection/fuzzy_scan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/layout_detection/fuzzy_scan.png -------------------------------------------------------------------------------- /assets/demo/layout_detection/paper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/layout_detection/paper.png -------------------------------------------------------------------------------- /assets/demo/layout_detection/slides.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/layout_detection/slides.png -------------------------------------------------------------------------------- /assets/demo/layout_detection/watermark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/layout_detection/watermark.png -------------------------------------------------------------------------------- /assets/demo/ocr/ocr_001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/ocr/ocr_001.png -------------------------------------------------------------------------------- /assets/demo/ocr/ocr_002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/ocr/ocr_002.png -------------------------------------------------------------------------------- /assets/demo/table_parsing/table_001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/demo/table_parsing/table_001.png -------------------------------------------------------------------------------- /assets/readme/datalab_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/readme/datalab_logo.png -------------------------------------------------------------------------------- /assets/readme/layout_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/readme/layout_example.png -------------------------------------------------------------------------------- /assets/readme/mfd_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/readme/mfd_example.png -------------------------------------------------------------------------------- /assets/readme/modelscope_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/readme/modelscope_logo.png -------------------------------------------------------------------------------- /assets/readme/pdf-extract-kit_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/readme/pdf-extract-kit_logo.png -------------------------------------------------------------------------------- /assets/readme/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/readme/pipeline.png -------------------------------------------------------------------------------- /assets/readme/table_expamle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/readme/table_expamle.png -------------------------------------------------------------------------------- /assets/readme/unimernet_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/assets/readme/unimernet_result.jpg -------------------------------------------------------------------------------- /configs/config.yaml: -------------------------------------------------------------------------------- 1 | inputs: assets/demo/formula_detection_pdfs 2 | outputs: outputs/formula_detection_pdfs 3 | tasks: 4 | formula_detection: 5 | model: formula_detection_yolo 6 | model_config: 7 | img_size: 1280 8 | conf_thres: 0.25 9 | iou_thres: 0.45 10 | model_path: models/MFD/weights.pt 11 | visualize: True 12 | formula_recognition: 13 | model: formula_recognition_unimernet 14 | model_config: 15 | cfg_path: pdf_extract_kit/configs/unimernet.yaml 16 | model_path: models/MFR/UniMERNet 17 | visualize: True -------------------------------------------------------------------------------- /configs/formula_detection.yaml: -------------------------------------------------------------------------------- 1 | inputs: assets/demo/formula_detection 2 | outputs: outputs/formula_detection 3 | tasks: 4 | formula_detection: 5 | model: formula_detection_yolo 6 | model_config: 7 | img_size: 1280 8 | conf_thres: 0.25 9 | iou_thres: 0.45 10 | batch_size: 1 11 | model_path: models/MFD/YOLO/yolo_v8_ft.pt 12 | visualize: True -------------------------------------------------------------------------------- /configs/formula_recognition.yaml: -------------------------------------------------------------------------------- 1 | inputs: assets/demo/formula_recognition 2 | outputs: outputs/formula_recognition 3 | tasks: 4 | formula_recognition: 5 | model: formula_recognition_unimernet 6 | model_config: 7 | cfg_path: pdf_extract_kit/configs/unimernet.yaml 8 | model_path: models/MFR/unimernet_tiny 9 | visualize: False -------------------------------------------------------------------------------- /configs/layout_detection.yaml: -------------------------------------------------------------------------------- 1 | inputs: assets/demo/layout_detection 2 | outputs: outputs/layout_detection 3 | tasks: 4 | layout_detection: 5 | model: layout_detection_yolo 6 | model_config: 7 | img_size: 1024 8 | conf_thres: 0.25 9 | iou_thres: 0.45 10 | model_path: models/Layout/YOLO/doclayout_yolo_ft.pt 11 | visualize: True -------------------------------------------------------------------------------- /configs/layout_detection_layoutlmv3.yaml: -------------------------------------------------------------------------------- 1 | inputs: assets/demo/layout_detection 2 | outputs: outputs/layout_detection 3 | tasks: 4 | layout_detection: 5 | model: layout_detection_layoutlmv3 6 | model_config: 7 | model_path: models/Layout/LayoutLMv3/model_final.pth -------------------------------------------------------------------------------- /configs/layout_detection_yolo.yaml: -------------------------------------------------------------------------------- 1 | inputs: assets/demo/layout_detection 2 | outputs: outputs/layout_detection 3 | tasks: 4 | layout_detection: 5 | model: layout_detection_yolo 6 | model_config: 7 | img_size: 1024 8 | conf_thres: 0.25 9 | iou_thres: 0.45 10 | model_path: models/Layout/YOLO/doclayout_yolo_ft.pt 11 | visualize: True 12 | device: 0 -------------------------------------------------------------------------------- /configs/ocr.yaml: -------------------------------------------------------------------------------- 1 | inputs: assets/demo/ocr 2 | outputs: outputs/ocr 3 | visualize: True 4 | tasks: 5 | ocr: 6 | model: ocr_ppocr 7 | model_config: 8 | lang: ch 9 | show_log: True 10 | det_model_dir: models/OCR/PaddleOCR/det/ch_PP-OCRv4_det 11 | rec_model_dir: models/OCR/PaddleOCR/rec/ch_PP-OCRv4_rec 12 | det_db_box_thresh: 0.3 -------------------------------------------------------------------------------- /configs/table_parsing.yaml: -------------------------------------------------------------------------------- 1 | inputs: assets/demo/table_parsing 2 | outputs: outputs/table_parsing 3 | tasks: 4 | table_parsing: 5 | model: table_parsing_struct_eqtable 6 | model_config: 7 | model_path: models/TabRec/StructEqTable 8 | max_new_tokens: 1024 9 | max_time: 30 10 | output_format: latex 11 | lmdeploy: False 12 | flash_atten: True -------------------------------------------------------------------------------- /docs/en/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.10" 7 | 8 | formats: 9 | - epub 10 | 11 | python: 12 | install: 13 | - requirements: requirements/docs.txt 14 | 15 | sphinx: 16 | configuration: docs/en/conf.py 17 | -------------------------------------------------------------------------------- /docs/en/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/en/_static/image/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/en/_static/image/logo.png -------------------------------------------------------------------------------- /docs/en/algorithm/formula_recognition.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_formula_recognition: 2 | 3 | ============ 4 | Formula Recognition Algorithm 5 | ============ 6 | 7 | Introduction 8 | ================= 9 | 10 | Formula detection involves recognizing the content of a given input formula image and converting it to ``LaTeX`` format. 11 | 12 | Model Usage 13 | ================= 14 | 15 | With the environment properly configured, you can run the layout detection algorithm script by executing ``scripts/formula_recognition.py``. 16 | 17 | .. code:: shell 18 | 19 | $ python scripts/formula_recognition.py --config configs/formula_recognition.yaml 20 | 21 | Model Configuration 22 | ----------------- 23 | 24 | .. code:: yaml 25 | 26 | inputs: assets/demo/formula_recognition 27 | outputs: outputs/formula_recognition 28 | tasks: 29 | formula_recognition: 30 | model: formula_recognition_unimernet 31 | model_config: 32 | cfg_path: pdf_extract_kit/configs/unimernet.yaml 33 | model_path: models/MFR/unimernet_tiny 34 | visualize: False 35 | 36 | - inputs/outputs: Define the input file path and the directory for LaTeX prediction results, respectively. 37 | - tasks: Define the task type, currently only containing a formula recognition task. 38 | - model: Define the specific model type: Currently, only the `UniMERNet `_ formula recognition model is provided. 39 | - model_config: Define the model configuration. 40 | - cfg_path: Path to the UniMERNet configuration file. 41 | - model_path: Path to the model weights. 42 | - visualize: Whether to visualize the model results. Visualized results will be saved in the outputs directory. 43 | 44 | Support for Diverse Inputs 45 | ----------------- 46 | 47 | The formula detection script in PDF-Extract-Kit supports ``single formula images`` and ``document images with corresponding formula regions``. 48 | 49 | Viewing Visualization Results 50 | ----------------- 51 | 52 | When the visualize setting in the config file is set to True, ``LaTeX`` prediction results will be saved in the outputs directory. -------------------------------------------------------------------------------- /docs/en/algorithm/reading_order.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_reading_oder: 2 | ============== 3 | Reading Order Algorithm 4 | ============== 5 | 6 | Comming soon. -------------------------------------------------------------------------------- /docs/en/evaluation/formula_detection.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | Formula Detection Evaluation 3 | ===================== 4 | 5 | XXX -------------------------------------------------------------------------------- /docs/en/evaluation/formula_recognition.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | Formula Recognition Evaluation 3 | ===================== 4 | 5 | XXX -------------------------------------------------------------------------------- /docs/en/evaluation/layout_detection.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | Layout Detection Evaluation 3 | ===================== 4 | 5 | XXX -------------------------------------------------------------------------------- /docs/en/evaluation/ocr.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | OCR Evaluation 3 | ===================== 4 | 5 | XXX -------------------------------------------------------------------------------- /docs/en/evaluation/pdf_extract.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | PDF Content Extraction Evaluation [End-to-End] 3 | ===================== 4 | 5 | XXX -------------------------------------------------------------------------------- /docs/en/evaluation/reading_order.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | Reading Order Evaluation 3 | ===================== 4 | 5 | XXX -------------------------------------------------------------------------------- /docs/en/evaluation/table_recognition.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | Table Recognition Evaluation 3 | ===================== 4 | 5 | XXX 6 | -------------------------------------------------------------------------------- /docs/en/get_started/installation.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | Installation 3 | ================================== 4 | 5 | In this section, we will demonstrate how to install PDF-Extract-Kit. 6 | 7 | Best Practices 8 | ============== 9 | 10 | We recommend users follow our best practices for installing PDF-Extract-Kit. It is recommended to use a Python 3.10 conda virtual environment for the installation. 11 | 12 | **Step 1.** Create a Python 3.10 virtual environment using conda. 13 | 14 | .. code-block:: console 15 | 16 | $ conda create -n pdf-extract-kit-1.0 python=3.10 -y 17 | $ conda activate pdf-extract-kit-1.0 18 | 19 | **Step 2.** Install the dependencies for PDF-Extract-Kit. 20 | 21 | .. code-block:: console 22 | 23 | $ # For GPU devices 24 | $ pip install -r requirements.txt 25 | $ # For CPU-only devices 26 | $ pip install -r requirements-cpu.txt 27 | 28 | .. note:: 29 | 30 | For the convenience of user environment configuration, requirements.txt only includes the environment needed for the current best models, which currently include: 31 | 32 | - Layout Detection: YOLO series (YOLOv10, DocLayout-YOLO) 33 | - Formula Detection: YOLO series (YOLOv8) 34 | - Formula Recognition: UniMERNet 35 | - OCR: PaddleOCR 36 | 37 | For other models, such as LayoutLMv3, additional environment setup is required. For details, see \ :ref:`Layout Detection Algorithms `. -------------------------------------------------------------------------------- /docs/en/get_started/quickstart.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | Quick Start 3 | ================================== 4 | 5 | Once the PDF-Extract-Kit environment is set up and the models are downloaded, we can start using PDF-Extract-Kit. 6 | 7 | Layout Detection Example 8 | ============== 9 | 10 | Layout detection offers several models: ``LayoutLMv3``, ``YOLOv10``, and ``DocLayout-YOLO``. Compared to ``LayoutLMv3``, ``YOLOv10`` is faster. ``DocLayout-YOLO`` is based on YOLOv10 and includes diverse document pre-training and model optimization, offering both speed and high accuracy. 11 | 12 | **1. Using Layout Detection Models** 13 | 14 | .. code-block:: console 15 | 16 | $ python scripts/layout_detection.py --config configs/layout_detection.yaml 17 | 18 | After execution, we can view the detection results in the `outputs/layout_detection` directory. 19 | 20 | .. note:: 21 | 22 | The ``layout_detection.yaml`` file sets the input, output, and model configuration. For a more detailed tutorial on layout detection, see :ref:`Layout Detection Algorithm `. 23 | 24 | Formula Detection Example 25 | ============== 26 | 27 | .. code-block:: console 28 | 29 | $ python scripts/formula_detection.py --config configs/formula_detection.yaml 30 | 31 | After execution, we can view the detection results in the `outputs/formula_detection` directory. 32 | 33 | .. note:: 34 | 35 | The ``formula_detection.yaml`` file sets the input, output, and model configuration. For a more detailed tutorial on formula detection, see :ref:`Formula Detection Algorithm `. -------------------------------------------------------------------------------- /docs/en/index.rst: -------------------------------------------------------------------------------- 1 | .. xtuner documentation master file, created by 2 | sphinx-quickstart on Tue Jan 9 16:33:06 2024. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to the PDF-Extract-Kit Documentation 7 | ============================================== 8 | 9 | .. figure:: ./_static/image/logo.png 10 | :align: center 11 | :alt: pdf-extract-kit 12 | :class: no-scaled-link 13 | 14 | .. raw:: html 15 | 16 |

17 | High-Quality Document Parsing Toolkit 18 | 19 |

20 | 21 |

22 | 23 | Star 24 | Watch 25 | Fork 26 |

27 | 28 | 29 | Tutorial 30 | ------------- 31 | .. toctree:: 32 | :maxdepth: 2 33 | :caption: Getting Started 34 | 35 | get_started/installation.rst 36 | get_started/pretrained_model.rst 37 | get_started/quickstart.rst 38 | 39 | .. toctree:: 40 | :maxdepth: 2 41 | :caption: Core Algorithm Modules 42 | 43 | algorithm/layout_detection.rst 44 | algorithm/formula_detection.rst 45 | algorithm/formula_recognition.rst 46 | algorithm/ocr.rst 47 | algorithm/table_recognition.rst 48 | algorithm/reading_order.rst 49 | 50 | .. toctree:: 51 | :maxdepth: 2 52 | :caption: Task Extensions 53 | 54 | task_extend/code.rst 55 | task_extend/doc.rst 56 | task_extend/evaluation.rst 57 | 58 | .. toctree:: 59 | :maxdepth: 2 60 | :caption: Supported Models 61 | 62 | models/supported.md 63 | 64 | 65 | .. toctree:: 66 | :maxdepth: 2 67 | :caption: Model Performance Evaluation 68 | 69 | evaluation/layout_detection.rst 70 | evaluation/formula_detection.rst 71 | evaluation/formula_recognition.rst 72 | evaluation/ocr.rst 73 | evaluation/table_recognition.rst 74 | evaluation/reading_order.rst 75 | evaluation/pdf_extract.rst 76 | 77 | .. toctree:: 78 | :maxdepth: 2 79 | :caption: PDF Projects 80 | 81 | project/pdf_extract.md 82 | project/doc_translate.md 83 | project/speed_up.md -------------------------------------------------------------------------------- /docs/en/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/en/models/supported.md: -------------------------------------------------------------------------------- 1 | # The Supported Models 2 | 3 | -------------------------------------------------------------------------------- /docs/en/notes/changelog.md: -------------------------------------------------------------------------------- 1 | 14 | 15 | # Changelog 16 | 17 | ## v1.0.0 (2024-10-10) 18 | 19 | The PDF-Extract-Kit-1.0 has been refactored with a more streamlined and user-friendly modular design! 🔥🔥🔥 20 | 21 | ## v0.1.0 (2024-07-01) 22 | 23 | Official release of PDF-Extract-Kit! 🔥🔥🔥 24 | 25 | ### Highlights 26 | 27 | - PDF-Extract-Kit-1.0 offers a high-quality layout detection model, DocLayout-YOLO. -------------------------------------------------------------------------------- /docs/en/project/doc_translate.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Document Translation Project 3 | ================= 4 | 5 | XXXX 6 | XXXX -------------------------------------------------------------------------------- /docs/en/project/speed_up.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Model Acceleration Project 3 | ================= 4 | 5 | XXXX 6 | XXXX -------------------------------------------------------------------------------- /docs/en/switch_language.md: -------------------------------------------------------------------------------- 1 | ## English 2 | 3 | ## 简体中文 4 | -------------------------------------------------------------------------------- /docs/en/task_extend/doc.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | Documentation Supplement 3 | ================================== 4 | 5 | -------------------------------------------------------------------------------- /docs/en/task_extend/evaluation.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | Model Performance Evaluation 3 | ================================== 4 | 5 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_rtd_theme 3 | myst-parser 4 | sphinx-copybutton 5 | sphinx-argparse 6 | sphinx-book-theme -------------------------------------------------------------------------------- /docs/zh_cn/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.10" 7 | 8 | formats: 9 | - epub 10 | 11 | python: 12 | install: 13 | - requirements: requirements/docs.txt 14 | 15 | sphinx: 16 | configuration: docs/zh_cn/conf.py 17 | -------------------------------------------------------------------------------- /docs/zh_cn/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/algorithm/formula_detection.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/algorithm/formula_detection.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/algorithm/formula_recognition.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/algorithm/formula_recognition.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/algorithm/layout_detection.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/algorithm/layout_detection.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/algorithm/ocr.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/algorithm/ocr.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/algorithm/reading_order.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/algorithm/reading_order.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/algorithm/table_recognition.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/algorithm/table_recognition.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/evaluation/formula_detection.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/evaluation/formula_detection.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/evaluation/formula_recognition.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/evaluation/formula_recognition.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/evaluation/layout_detection.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/evaluation/layout_detection.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/evaluation/ocr.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/evaluation/ocr.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/evaluation/pdf_extract.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/evaluation/pdf_extract.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/evaluation/reading_order.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/evaluation/reading_order.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/evaluation/table_recognition.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/evaluation/table_recognition.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/get_started/installation.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/get_started/installation.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/get_started/pretrained_model.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/get_started/pretrained_model.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/get_started/quickstart.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/get_started/quickstart.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/models/supported.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/models/supported.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/notes/changelog.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/notes/changelog.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/project/doc_translate.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/project/doc_translate.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/project/pdf_extract.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/project/pdf_extract.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/project/speed_up.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/project/speed_up.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/switch_language.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/switch_language.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/task_extend/code.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/task_extend/code.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/task_extend/doc.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/task_extend/doc.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/doctrees/task_extend/evaluation.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/doctrees/task_extend/evaluation.doctree -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 5a9946703cacab0ac69c1ce8ab529bd4 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_images/logo.png -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/algorithm/formula_detection.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_formula_detection: 2 | 3 | ==================== 4 | 公式检测算法 5 | ==================== 6 | 7 | 简介 8 | ==================== 9 | 10 | 公式检测是针对给定的输入图像,检测出图像中所有包含公式的位置(包含行内公式和行间公式) 11 | 12 | .. note:: 13 | 14 | 公式检测实际上属于布局检测子任务,但由于公式检查的复杂性,我们建议使用单独的公式检测模型解耦。 15 | 这样通常使得数据标注更加方便,且公式检测效果也更好。 16 | 17 | 模型使用 18 | ==================== 19 | 20 | 在配置好环境的情况下,直接执行 ``scripts/formula_detection.py`` 即可运行布局检测算法脚本。 21 | 22 | .. code:: shell 23 | 24 | $ python scripts/formula_detection.py --config configs/formula_detection.yaml 25 | 26 | 模型配置 27 | -------------------- 28 | 29 | .. code:: yaml 30 | 31 | inputs: assets/demo/formula_detection 32 | outputs: outputs/formula_detection 33 | tasks: 34 | formula_detection: 35 | model: formula_detection_yolo 36 | model_config: 37 | img_size: 1280 38 | conf_thres: 0.25 39 | iou_thres: 0.45 40 | batch_size: 1 41 | model_path: models/MFD/yolov8/weights.pt 42 | visualize: True 43 | 44 | - inputs/outputs: 分别定义输入文件路径和可视化输出目录 45 | - tasks: 定义任务类型,当前只包含一个公式检测任务 46 | - model: 定义具体模型类型: 当前仅提供YOLO公式检测模型 47 | - model_config: 定义模型配置 48 | - img_size: 定义图像长边大小,短边会根据长边等比例缩放 49 | - conf_thres: 定义置信度阈值,仅检测大于该阈值的目标 50 | - iou_thres: 定义IoU阈值,去除重叠度大于该阈值的目标 51 | - batch_size: 定义批量大小,推理时每次同时推理的图像数,一般情况下越大推理速度越快,显卡越好该数值可以设置的越大 52 | - model_path: 模型权重路径 53 | - visualize: 是否对模型结果进行可视化,可视化结果会保存在outputs目录下。 54 | 55 | 多样化输入支持 56 | -------------------- 57 | 58 | PDF-Extract-Kit中的公式检测脚本支持 ``单个图像`` 、 ``只包含图像文件的目录`` 、 ``单个PDF文件`` 、 ``只包含PDF文件的目录`` 等输入形式。 59 | 60 | .. note:: 61 | 62 | 根据自己实际数据形式,修改 ``configs/formula_detection.yaml`` 中 ``inputs`` 的路径即可 63 | - 单个图像: path/to/image 64 | - 图像文件夹: path/to/images 65 | - 单个PDF文件: path/to/pdf 66 | - PDF文件夹: path/to/pdfs 67 | 68 | .. note:: 69 | 70 | 当使用PDF作为输入时,需要将 ``formula_detection.py`` 中的 ``predict_images`` 修改为 ``predict_pdfs`` 。 71 | 72 | 73 | .. code:: python 74 | 75 | # for image detection 76 | detection_results = model_formula_detection.predict_images(input_data, result_path) 77 | 78 | 79 | .. code:: python 80 | 81 | # for pdf detection 82 | detection_results = model_formula_detection.predict_pdfs(input_data, result_path) 83 | 84 | 85 | 可视化结果查看 86 | -------------------- 87 | 88 | 当config文件中 ``visualize`` 设置为 ``True`` 时,可视化结果会保存在 ``outputs/formula_detection`` 目录下。 89 | 90 | .. note:: 91 | 92 | 可视化可以方便对模型结果进行分析,但当进行大批量任务时,建议关掉可视化(设置 ``visualize`` 为 ``False`` ),减少内存和磁盘占用。 -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/algorithm/formula_recognition.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_formula_recognition: 2 | 3 | ============ 4 | 公式识别算法 5 | ============ 6 | 7 | 简介 8 | ================= 9 | 10 | 公式检测是指给定输入公式图像,识别公式图像内容并转为 ``LaTeX`` 格式。 11 | 12 | 模型使用 13 | ================= 14 | 15 | 在配置好环境的情况下,直接执行 ``scripts/formula_recognition.py`` 即可运行布局检测算法脚本。 16 | 17 | .. code:: shell 18 | 19 | $ python scripts/formula_recognition.py --config configs/formula_recognition.yaml 20 | 21 | 模型配置 22 | ----------------- 23 | 24 | .. code:: yaml 25 | 26 | inputs: assets/demo/formula_recognition 27 | outputs: outputs/formula_recognition 28 | tasks: 29 | formula_recognition: 30 | model: formula_recognition_unimernet 31 | model_config: 32 | cfg_path: pdf_extract_kit/configs/unimernet.yaml 33 | model_path: models/MFR/unimernet_tiny 34 | visualize: False 35 | 36 | - inputs/outputs: 分别定义输入文件路径和LaTeX预测结果目录 37 | - tasks: 定义任务类型,当前只包含一个公式识别任务 38 | - model: 定义具体模型类型: 当前仅提供 `UniMERNet `_ 公式识别模型 39 | - model_config: 定义模型配置 40 | - cfg_path: UniMERNet配置文件路径 41 | - model_path: 模型权重路径 42 | - visualize: 是否对模型结果进行可视化,可视化结果会保存在outputs目录下。 43 | 44 | 多样化输入支持 45 | ----------------- 46 | 47 | PDF-Extract-Kit中的公式检测脚本支持 ``单个公式图像`` 、 ``文档图像及对应公式区域`` 48 | 49 | 可视化结果查看 50 | ----------------- 51 | 52 | 当config文件中visualize设置为True时, ``LaTeX`` 预测结果会保存在outputs目录下。 -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/algorithm/ocr.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_ocr: 2 | ========================== 3 | 光学字符识别(OCR)算法 4 | ========================== 5 | 6 | 简介 7 | ==================== 8 | 9 | 光学字符识别(OCR)是指对图片中的文字块进行检测和识别。 10 | 11 | 12 | 模型使用 13 | ==================== 14 | 15 | 在配置好环境的情况下,直接执行 ``scripts/ocr.py`` 即可运行OCR算法脚本。 16 | 17 | .. code:: shell 18 | 19 | $ python scripts/ocr.py --config configs/ocr.yaml 20 | 21 | 22 | 模型配置 23 | -------------------- 24 | 25 | .. code:: yaml 26 | 27 | inputs: assets/demo/ocr 28 | outputs: outputs/ocr 29 | visualize: True 30 | tasks: 31 | ocr: 32 | model: ocr_ppocr 33 | model_config: 34 | lang: ch 35 | show_log: True 36 | det_model_dir: models/OCR/PaddleOCR/det/ch_PP-OCRv4_det 37 | rec_model_dir: models/OCR/PaddleOCR/rec/ch_PP-OCRv4_rec 38 | det_db_box_thresh: 0.3 39 | 40 | - inputs/outputs: 分别定义输入文件路径和输出路径 41 | - visualize: 是否对模型结果进行可视化,可视化结果会保存在outputs目录下。 42 | - tasks: 定义任务类型,当前只包含一个OCR任务 43 | - model: 定义具体模型类型, 当前仅提供PaddleOCR模型 44 | - model_config: 定义模型配置 45 | - lang: 定义语种,默认语种ch支持中英文文字的检测和识别 46 | - show_log: 是否打印检测识别过程的日志 47 | - det_model_dir: 定义PaddleOCR检测模型的路径,指定路径不存在时,会自动下载模型权重到该路径 48 | - rec_model_dir: 定义PaddleOCR识别模型的路径,指定路径不存在时,会自动下载模型权重到该路径 49 | - det_db_box_thresh: 检测框筛选阈值,置信度低于该阈值的框会被舍弃 50 | 51 | 52 | 多样化输入支持 53 | -------------------- 54 | 55 | PDF-Extract-Kit中的OCR脚本支持 ``单个图像/PDF文件`` 、 ``包含图像/PDF文件的目录`` 等输入形式。 56 | 57 | 58 | 可视化结果查看 59 | -------------------- 60 | 61 | 当config文件中 ``visualize`` 设置为 ``True`` 时,可视化结果会保存在 ``outputs`` 参数指定的目录下。 62 | 63 | .. note:: 64 | 65 | 可视化可以方便对模型结果进行分析,但当进行大批量任务时,建议关掉可视化(设置 ``visualize`` 为 ``False`` ),减少内存和磁盘占用。 -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/algorithm/reading_order.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_reading_oder: 2 | ============== 3 | 阅读顺序算法 4 | ============== 5 | 6 | Comming soon. -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/algorithm/table_recognition.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_table_recognition: 2 | ================= 3 | 表格识别算法 4 | ================= 5 | 6 | Comming soon. -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/evaluation/formula_detection.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | 公式检测算法评测 3 | ===================== 4 | 5 | XXX -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/evaluation/formula_recognition.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | 公式识别算法评测 3 | ===================== 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/evaluation/layout_detection.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | 布局检测算法评测 3 | ===================== 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/evaluation/ocr.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | OCR算法评测 3 | ===================== 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/evaluation/pdf_extract.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | PDF内容提取评测【端到端】 3 | ===================== 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/evaluation/reading_order.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | 阅读顺序算法评测 3 | ===================== 4 | 5 | XXX -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/evaluation/table_recognition.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | 表格识别算法评测 3 | ===================== 4 | 5 | Comming soon! 6 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/get_started/installation.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | 安装 3 | ================================== 4 | 5 | 本节中,我们将演示如何安装 PDF-Extract-Kit。 6 | 7 | 最佳实践 8 | ======== 9 | 10 | 我们推荐用户参照我们的最佳实践安装 PDF-Extract-Kit。 11 | 推荐使用 Python-3.10 的 conda 虚拟环境安装 PDF-Extract-Kit。 12 | 13 | **步骤 1.** 使用 conda 先构建一个 Python-3.10 的虚拟环境 14 | 15 | .. code-block:: console 16 | 17 | $ conda create -n pdf-extract-kit-1.0 python=3.10 -y 18 | $ conda activate pdf-extract-kit-1.0 19 | 20 | **步骤 2.** 安装 PDF-Extract-Kit 的依赖项 21 | 22 | .. code-block:: console 23 | 24 | $ # 对于GPU设备 25 | $ pip install -r requirements.txt 26 | $ # 对于CPU设备 27 | $ pip install -r requirements-cpu.txt 28 | 29 | .. note:: 30 | 31 | 考虑到用户环境配置的便捷性,我们在requirements.txt只包含当前最好模型需要的环境,目前包含 32 | 33 | - 布局检测:YOLO系列(YOLOv10, DocLayout-YOLO) 34 | - 公式检测:YOLO系列 (YOLOv8) 35 | - 公式识别:UniMERNet 36 | - OCR: PaddleOCR 37 | 38 | 对于其他模型请,如LayoutLMv3需要单独安装环境,具体见\ :ref:`布局检测算法 ` -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/get_started/quickstart.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | 快速开始 3 | ================================== 4 | 5 | 配置好PDF-Extract-Kit环境,并下载好模型后,我们可以开始使用PDF-Extract-Kit了。 6 | 7 | 8 | 9 | 布局检测示例 10 | ============== 11 | 12 | 布局检测提供了多种模型: ``LayoutLMv3``、 ``YOLOv10``、 ``DocLayout-YOLO``, 相比与 ``LayoutLMv3``, ``YOLOv10`` 速度更快, ``DocLayout-YOLO`` 则是基于 ``YOLOv10`` 的基础上进行多样性文档预训练及模型优化,速度快,精度高。 13 | 14 | **1. 使用布局检测模型** 15 | 16 | .. code-block:: console 17 | 18 | $ python scripts/layout_detection.py --config configs/layout_detection.yaml 19 | 20 | 执行完之后,我们可以在 ``outpus/layout_detection`` 目录下查看检测结果。 21 | 22 | .. note:: 23 | 24 | ``layout_detection.yaml`` 设置输入、输出及模型配置,布局检测更详细教程见\ :ref:`布局检测算法 ` \ 。 25 | 26 | 27 | 公式检测示例 28 | ============== 29 | 30 | 31 | .. code-block:: console 32 | 33 | $ python scripts/formula_detection.py --config configs/formula_detection.yaml 34 | 35 | 执行完之后,我们可以在 ``outpus/formula_detection`` 目录下查看检测结果。 36 | 37 | .. note:: 38 | 39 | ``formula_detection.yaml`` 设置输入、输出及模型配置,公式检测更详细教程见 \ :ref:`公式检测算法 ` \ 。 40 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/index.rst: -------------------------------------------------------------------------------- 1 | .. xtuner documentation master file, created by 2 | sphinx-quickstart on Tue Jan 9 16:33:06 2024. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | 欢迎来到 PDF-Extract-Kit 的中文文档 7 | ============================================== 8 | 9 | .. figure:: ./_static/image/logo.png 10 | :align: center 11 | :alt: pdf-extract-kit 12 | :class: no-scaled-link 13 | 14 | .. raw:: html 15 | 16 |

17 | 高质量文档解析工具箱 18 | 19 |

20 | 21 |

22 | 23 | Star 24 | Watch 25 | Fork 26 |

27 | 28 | 29 | 文档 30 | ------------- 31 | .. toctree:: 32 | :maxdepth: 2 33 | :caption: 快速上手 34 | 35 | get_started/installation.rst 36 | get_started/pretrained_model.rst 37 | get_started/quickstart.rst 38 | 39 | .. toctree:: 40 | :maxdepth: 2 41 | :caption: 基础算法模块 42 | 43 | algorithm/layout_detection.rst 44 | algorithm/formula_detection.rst 45 | algorithm/formula_recognition.rst 46 | algorithm/ocr.rst 47 | algorithm/table_recognition.rst 48 | algorithm/reading_order.rst 49 | 50 | .. toctree:: 51 | :maxdepth: 2 52 | :caption: 新任务拓展 53 | 54 | task_extend/code.rst 55 | task_extend/doc.rst 56 | task_extend/evaluation.rst 57 | 58 | .. toctree:: 59 | :maxdepth: 2 60 | :caption: 支持的模型列表 61 | 62 | models/supported.md 63 | 64 | 65 | .. toctree:: 66 | :maxdepth: 2 67 | :caption: 模型性能评测 68 | 69 | evaluation/layout_detection.rst 70 | evaluation/formula_detection.rst 71 | evaluation/formula_recognition.rst 72 | evaluation/ocr.rst 73 | evaluation/table_recognition.rst 74 | evaluation/reading_order.rst 75 | evaluation/pdf_extract.rst 76 | 77 | .. toctree:: 78 | :maxdepth: 2 79 | :caption: PDF项目 80 | 81 | project/pdf_extract.md 82 | project/doc_translate.md 83 | project/speed_up.md -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/models/supported.md: -------------------------------------------------------------------------------- 1 | # 已支持的模型 2 | 3 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/notes/changelog.md: -------------------------------------------------------------------------------- 1 | 14 | 15 | # 变更日志 16 | 17 | 18 | ## v0.2.0 (2024.09.30) 19 | 20 | PDF-Extract-Kit 代码重构,模块化设计更加简洁易用! 🔥🔥🔥 21 | 22 | ## v0.1.0 (2024.07.01) 23 | 24 | PDF-Extract-Kit 正式发布!🔥🔥🔥 25 | 26 | ### 亮点 27 | 28 | - PDF-Extract-Kit提供高质量布局检测模型 DocLayout-YOLO 29 | - PDF-Extract-Kit提供高质量公式检测模型 YOLOv8 -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/project/doc_translate.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | 文档翻译项目 3 | ================= 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/project/speed_up.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | 模型加速项目 3 | ================= 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/switch_language.md: -------------------------------------------------------------------------------- 1 | ## English 2 | 3 | ## 简体中文 4 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/task_extend/doc.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | 文档补充 3 | ================================== 4 | 5 | 在实现新的任务和模块后,需要在文档中补充相关内容,以便用户了解如何使用。 6 | 7 | 具体可以参考布局检测任务使用文档:\ :ref:`布局检测算法 ` 8 | 9 | 10 | 主要补充下述几个部分: 11 | 12 | * 任务简介 13 | * 模型使用方式 14 | * 配置文件解释 15 | * 多样化输入支持(如果有) 16 | * 可视化结果查看 -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_sources/task_extend/evaluation.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | 模型评测 3 | ================================== 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/check-solid.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/copy-button.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/copybutton.css: -------------------------------------------------------------------------------- 1 | /* Copy buttons */ 2 | button.copybtn { 3 | position: absolute; 4 | display: flex; 5 | top: .3em; 6 | right: .3em; 7 | width: 1.7em; 8 | height: 1.7em; 9 | opacity: 0; 10 | transition: opacity 0.3s, border .3s, background-color .3s; 11 | user-select: none; 12 | padding: 0; 13 | border: none; 14 | outline: none; 15 | border-radius: 0.4em; 16 | /* The colors that GitHub uses */ 17 | border: #1b1f2426 1px solid; 18 | background-color: #f6f8fa; 19 | color: #57606a; 20 | } 21 | 22 | button.copybtn.success { 23 | border-color: #22863a; 24 | color: #22863a; 25 | } 26 | 27 | button.copybtn svg { 28 | stroke: currentColor; 29 | width: 1.5em; 30 | height: 1.5em; 31 | padding: 0.1em; 32 | } 33 | 34 | div.highlight { 35 | position: relative; 36 | } 37 | 38 | /* Show the copybutton */ 39 | .highlight:hover button.copybtn, button.copybtn.success { 40 | opacity: 1; 41 | } 42 | 43 | .highlight button.copybtn:hover { 44 | background-color: rgb(235, 235, 235); 45 | } 46 | 47 | .highlight button.copybtn:active { 48 | background-color: rgb(187, 187, 187); 49 | } 50 | 51 | /** 52 | * A minimal CSS-only tooltip copied from: 53 | * https://codepen.io/mildrenben/pen/rVBrpK 54 | * 55 | * To use, write HTML like the following: 56 | * 57 | *

Short

58 | */ 59 | .o-tooltip--left { 60 | position: relative; 61 | } 62 | 63 | .o-tooltip--left:after { 64 | opacity: 0; 65 | visibility: hidden; 66 | position: absolute; 67 | content: attr(data-tooltip); 68 | padding: .2em; 69 | font-size: .8em; 70 | left: -.2em; 71 | background: grey; 72 | color: white; 73 | white-space: nowrap; 74 | z-index: 2; 75 | border-radius: 2px; 76 | transform: translateX(-102%) translateY(0); 77 | transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); 78 | } 79 | 80 | .o-tooltip--left:hover:after { 81 | display: block; 82 | opacity: 1; 83 | visibility: visible; 84 | transform: translateX(-100%) translateY(0); 85 | transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); 86 | transition-delay: .5s; 87 | } 88 | 89 | /* By default the copy button shouldn't show up when printing a page */ 90 | @media print { 91 | button.copybtn { 92 | display: none; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | const DOCUMENTATION_OPTIONS = { 2 | VERSION: '0.1.0', 3 | LANGUAGE: 'zh-CN', 4 | COLLAPSE_INDEX: false, 5 | BUILDER: 'html', 6 | FILE_SUFFIX: '.html', 7 | LINK_SUFFIX: '.html', 8 | HAS_SOURCE: true, 9 | SOURCELINK_SUFFIX: '', 10 | NAVIGATION_WITH_KEYS: false, 11 | SHOW_SEARCH_SUMMARY: true, 12 | ENABLE_SEARCH_SHORTCUTS: true, 13 | }; -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/file.png -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/images/logo_binder.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 10 | logo 11 | 12 | 13 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/images/logo_colab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/images/logo_colab.png -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/images/logo_deepnote.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/images/logo_jupyterhub.svg: -------------------------------------------------------------------------------- 1 | logo_jupyterhubHub 2 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ar/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/ar/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ar/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: ar\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "طباعة إلى PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "موضوع بواسطة" 16 | 17 | msgid "Download source file" 18 | msgstr "تنزيل ملف المصدر" 19 | 20 | msgid "open issue" 21 | msgstr "قضية مفتوحة" 22 | 23 | msgid "Contents" 24 | msgstr "محتويات" 25 | 26 | msgid "previous page" 27 | msgstr "الصفحة السابقة" 28 | 29 | msgid "Download notebook file" 30 | msgstr "تنزيل ملف دفتر الملاحظات" 31 | 32 | msgid "Copyright" 33 | msgstr "حقوق النشر" 34 | 35 | msgid "Download this page" 36 | msgstr "قم بتنزيل هذه الصفحة" 37 | 38 | msgid "Source repository" 39 | msgstr "مستودع المصدر" 40 | 41 | msgid "By" 42 | msgstr "بواسطة" 43 | 44 | msgid "repository" 45 | msgstr "مخزن" 46 | 47 | msgid "Last updated on" 48 | msgstr "آخر تحديث في" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "تبديل التنقل" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "موضوع كتاب أبو الهول" 55 | 56 | msgid "suggest edit" 57 | msgstr "أقترح تحرير" 58 | 59 | msgid "Open an issue" 60 | msgstr "افتح قضية" 61 | 62 | msgid "Launch" 63 | msgstr "إطلاق" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "وضع ملء الشاشة" 67 | 68 | msgid "Edit this page" 69 | msgstr "قم بتحرير هذه الصفحة" 70 | 71 | msgid "By the" 72 | msgstr "بواسطة" 73 | 74 | msgid "next page" 75 | msgstr "الصفحة التالية" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/bg/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/bg/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/bg/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: bg\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Печат в PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Тема от" 16 | 17 | msgid "Download source file" 18 | msgstr "Изтеглете изходния файл" 19 | 20 | msgid "open issue" 21 | msgstr "отворен брой" 22 | 23 | msgid "Contents" 24 | msgstr "Съдържание" 25 | 26 | msgid "previous page" 27 | msgstr "предишна страница" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Изтеглете файла на бележника" 31 | 32 | msgid "Copyright" 33 | msgstr "Авторско право" 34 | 35 | msgid "Download this page" 36 | msgstr "Изтеглете тази страница" 37 | 38 | msgid "Source repository" 39 | msgstr "Хранилище на източника" 40 | 41 | msgid "By" 42 | msgstr "От" 43 | 44 | msgid "repository" 45 | msgstr "хранилище" 46 | 47 | msgid "Last updated on" 48 | msgstr "Последна актуализация на" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Превключване на навигацията" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Тема на книгата Sphinx" 55 | 56 | msgid "suggest edit" 57 | msgstr "предложи редактиране" 58 | 59 | msgid "Open an issue" 60 | msgstr "Отворете проблем" 61 | 62 | msgid "Launch" 63 | msgstr "Стартиране" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Режим на цял екран" 67 | 68 | msgid "Edit this page" 69 | msgstr "Редактирайте тази страница" 70 | 71 | msgid "By the" 72 | msgstr "По" 73 | 74 | msgid "next page" 75 | msgstr "Следваща страница" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/bn/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/bn/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/bn/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: bn\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "পিডিএফ প্রিন্ট করুন" 13 | 14 | msgid "Theme by the" 15 | msgstr "থিম দ্বারা" 16 | 17 | msgid "Download source file" 18 | msgstr "উত্স ফাইল ডাউনলোড করুন" 19 | 20 | msgid "open issue" 21 | msgstr "খোলা সমস্যা" 22 | 23 | msgid "previous page" 24 | msgstr "আগের পৃষ্ঠা" 25 | 26 | msgid "Download notebook file" 27 | msgstr "নোটবুক ফাইল ডাউনলোড করুন" 28 | 29 | msgid "Copyright" 30 | msgstr "কপিরাইট" 31 | 32 | msgid "Download this page" 33 | msgstr "এই পৃষ্ঠাটি ডাউনলোড করুন" 34 | 35 | msgid "Source repository" 36 | msgstr "উত্স সংগ্রহস্থল" 37 | 38 | msgid "By" 39 | msgstr "দ্বারা" 40 | 41 | msgid "Last updated on" 42 | msgstr "সর্বশেষ আপডেট" 43 | 44 | msgid "Toggle navigation" 45 | msgstr "নেভিগেশন টগল করুন" 46 | 47 | msgid "Sphinx Book Theme" 48 | msgstr "স্পিনিক্স বুক থিম" 49 | 50 | msgid "Open an issue" 51 | msgstr "একটি সমস্যা খুলুন" 52 | 53 | msgid "Launch" 54 | msgstr "শুরু করা" 55 | 56 | msgid "Edit this page" 57 | msgstr "এই পৃষ্ঠাটি সম্পাদনা করুন" 58 | 59 | msgid "By the" 60 | msgstr "দ্বারা" 61 | 62 | msgid "next page" 63 | msgstr "পরবর্তী পৃষ্ঠা" 64 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ca/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/ca/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ca/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: ca\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Imprimeix a PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema del" 16 | 17 | msgid "Download source file" 18 | msgstr "Baixeu el fitxer font" 19 | 20 | msgid "open issue" 21 | msgstr "número obert" 22 | 23 | msgid "previous page" 24 | msgstr "Pàgina anterior" 25 | 26 | msgid "Download notebook file" 27 | msgstr "Descarregar fitxer de quadern" 28 | 29 | msgid "Copyright" 30 | msgstr "Copyright" 31 | 32 | msgid "Download this page" 33 | msgstr "Descarregueu aquesta pàgina" 34 | 35 | msgid "Source repository" 36 | msgstr "Dipòsit de fonts" 37 | 38 | msgid "By" 39 | msgstr "Per" 40 | 41 | msgid "Last updated on" 42 | msgstr "Darrera actualització el" 43 | 44 | msgid "Toggle navigation" 45 | msgstr "Commuta la navegació" 46 | 47 | msgid "Sphinx Book Theme" 48 | msgstr "Tema del llibre Esfinx" 49 | 50 | msgid "suggest edit" 51 | msgstr "suggerir edició" 52 | 53 | msgid "Open an issue" 54 | msgstr "Obriu un número" 55 | 56 | msgid "Launch" 57 | msgstr "Llançament" 58 | 59 | msgid "Edit this page" 60 | msgstr "Editeu aquesta pàgina" 61 | 62 | msgid "By the" 63 | msgstr "Per la" 64 | 65 | msgid "next page" 66 | msgstr "pàgina següent" 67 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/cs/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/cs/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/cs/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: cs\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Tisk do PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Téma od" 16 | 17 | msgid "Download source file" 18 | msgstr "Stáhněte si zdrojový soubor" 19 | 20 | msgid "open issue" 21 | msgstr "otevřené číslo" 22 | 23 | msgid "Contents" 24 | msgstr "Obsah" 25 | 26 | msgid "previous page" 27 | msgstr "předchozí stránka" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Stáhnout soubor poznámkového bloku" 31 | 32 | msgid "Copyright" 33 | msgstr "autorská práva" 34 | 35 | msgid "Download this page" 36 | msgstr "Stáhněte si tuto stránku" 37 | 38 | msgid "Source repository" 39 | msgstr "Zdrojové úložiště" 40 | 41 | msgid "By" 42 | msgstr "Podle" 43 | 44 | msgid "repository" 45 | msgstr "úložiště" 46 | 47 | msgid "Last updated on" 48 | msgstr "Naposledy aktualizováno" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Přepnout navigaci" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Téma knihy Sfinga" 55 | 56 | msgid "suggest edit" 57 | msgstr "navrhnout úpravy" 58 | 59 | msgid "Open an issue" 60 | msgstr "Otevřete problém" 61 | 62 | msgid "Launch" 63 | msgstr "Zahájení" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Režim celé obrazovky" 67 | 68 | msgid "Edit this page" 69 | msgstr "Upravit tuto stránku" 70 | 71 | msgid "By the" 72 | msgstr "Podle" 73 | 74 | msgid "next page" 75 | msgstr "další strana" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/da/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/da/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/da/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: da\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Udskriv til PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema af" 16 | 17 | msgid "Download source file" 18 | msgstr "Download kildefil" 19 | 20 | msgid "open issue" 21 | msgstr "åbent nummer" 22 | 23 | msgid "Contents" 24 | msgstr "Indhold" 25 | 26 | msgid "previous page" 27 | msgstr "forrige side" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Download notesbog-fil" 31 | 32 | msgid "Copyright" 33 | msgstr "ophavsret" 34 | 35 | msgid "Download this page" 36 | msgstr "Download denne side" 37 | 38 | msgid "Source repository" 39 | msgstr "Kildelager" 40 | 41 | msgid "By" 42 | msgstr "Ved" 43 | 44 | msgid "repository" 45 | msgstr "lager" 46 | 47 | msgid "Last updated on" 48 | msgstr "Sidst opdateret den" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Skift navigation" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sphinx bogtema" 55 | 56 | msgid "suggest edit" 57 | msgstr "foreslå redigering" 58 | 59 | msgid "Open an issue" 60 | msgstr "Åbn et problem" 61 | 62 | msgid "Launch" 63 | msgstr "Start" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Fuldskærmstilstand" 67 | 68 | msgid "Edit this page" 69 | msgstr "Rediger denne side" 70 | 71 | msgid "By the" 72 | msgstr "Ved" 73 | 74 | msgid "next page" 75 | msgstr "Næste side" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/de/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/de/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/de/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: de\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "In PDF drucken" 13 | 14 | msgid "Theme by the" 15 | msgstr "Thema von der" 16 | 17 | msgid "Download source file" 18 | msgstr "Quelldatei herunterladen" 19 | 20 | msgid "open issue" 21 | msgstr "offenes Thema" 22 | 23 | msgid "Contents" 24 | msgstr "Inhalt" 25 | 26 | msgid "previous page" 27 | msgstr "vorherige Seite" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Notebook-Datei herunterladen" 31 | 32 | msgid "Copyright" 33 | msgstr "Urheberrechte ©" 34 | 35 | msgid "Download this page" 36 | msgstr "Laden Sie diese Seite herunter" 37 | 38 | msgid "Source repository" 39 | msgstr "Quell-Repository" 40 | 41 | msgid "By" 42 | msgstr "Durch" 43 | 44 | msgid "repository" 45 | msgstr "Repository" 46 | 47 | msgid "Last updated on" 48 | msgstr "Zuletzt aktualisiert am" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Navigation umschalten" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sphinx-Buch-Thema" 55 | 56 | msgid "suggest edit" 57 | msgstr "vorschlagen zu bearbeiten" 58 | 59 | msgid "Open an issue" 60 | msgstr "Öffnen Sie ein Problem" 61 | 62 | msgid "Launch" 63 | msgstr "Starten" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Vollbildmodus" 67 | 68 | msgid "Edit this page" 69 | msgstr "Bearbeite diese Seite" 70 | 71 | msgid "By the" 72 | msgstr "Bis zum" 73 | 74 | msgid "next page" 75 | msgstr "Nächste Seite" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/el/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/el/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/el/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: el\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Εκτύπωση σε PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Θέμα από το" 16 | 17 | msgid "Download source file" 18 | msgstr "Λήψη αρχείου προέλευσης" 19 | 20 | msgid "open issue" 21 | msgstr "ανοιχτό ζήτημα" 22 | 23 | msgid "Contents" 24 | msgstr "Περιεχόμενα" 25 | 26 | msgid "previous page" 27 | msgstr "προηγούμενη σελίδα" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Λήψη αρχείου σημειωματάριου" 31 | 32 | msgid "Copyright" 33 | msgstr "Πνευματική ιδιοκτησία" 34 | 35 | msgid "Download this page" 36 | msgstr "Λήψη αυτής της σελίδας" 37 | 38 | msgid "Source repository" 39 | msgstr "Αποθήκη πηγής" 40 | 41 | msgid "By" 42 | msgstr "Με" 43 | 44 | msgid "repository" 45 | msgstr "αποθήκη" 46 | 47 | msgid "Last updated on" 48 | msgstr "Τελευταία ενημέρωση στις" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Εναλλαγή πλοήγησης" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Θέμα βιβλίου Sphinx" 55 | 56 | msgid "suggest edit" 57 | msgstr "προτείνω επεξεργασία" 58 | 59 | msgid "Open an issue" 60 | msgstr "Ανοίξτε ένα ζήτημα" 61 | 62 | msgid "Launch" 63 | msgstr "Εκτόξευση" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "ΛΕΙΤΟΥΡΓΙΑ ΠΛΗΡΟΥΣ ΟΘΟΝΗΣ" 67 | 68 | msgid "Edit this page" 69 | msgstr "Επεξεργαστείτε αυτήν τη σελίδα" 70 | 71 | msgid "By the" 72 | msgstr "Από το" 73 | 74 | msgid "next page" 75 | msgstr "επόμενη σελίδα" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/eo/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/eo/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/eo/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: eo\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Presi al PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Temo de la" 16 | 17 | msgid "Download source file" 18 | msgstr "Elŝutu fontodosieron" 19 | 20 | msgid "open issue" 21 | msgstr "malferma numero" 22 | 23 | msgid "Contents" 24 | msgstr "Enhavo" 25 | 26 | msgid "previous page" 27 | msgstr "antaŭa paĝo" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Elŝutu kajeran dosieron" 31 | 32 | msgid "Copyright" 33 | msgstr "Kopirajto" 34 | 35 | msgid "Download this page" 36 | msgstr "Elŝutu ĉi tiun paĝon" 37 | 38 | msgid "Source repository" 39 | msgstr "Fonto-deponejo" 40 | 41 | msgid "By" 42 | msgstr "De" 43 | 44 | msgid "repository" 45 | msgstr "deponejo" 46 | 47 | msgid "Last updated on" 48 | msgstr "Laste ĝisdatigita la" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Ŝalti navigadon" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sfinksa Libro-Temo" 55 | 56 | msgid "suggest edit" 57 | msgstr "sugesti redaktadon" 58 | 59 | msgid "Open an issue" 60 | msgstr "Malfermu numeron" 61 | 62 | msgid "Launch" 63 | msgstr "Lanĉo" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Plenekrana reĝimo" 67 | 68 | msgid "Edit this page" 69 | msgstr "Redaktu ĉi tiun paĝon" 70 | 71 | msgid "By the" 72 | msgstr "Per la" 73 | 74 | msgid "next page" 75 | msgstr "sekva paĝo" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/es/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/es/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/es/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: es\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Imprimir en PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema por el" 16 | 17 | msgid "Download source file" 18 | msgstr "Descargar archivo fuente" 19 | 20 | msgid "open issue" 21 | msgstr "Tema abierto" 22 | 23 | msgid "Contents" 24 | msgstr "Contenido" 25 | 26 | msgid "previous page" 27 | msgstr "pagina anterior" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Descargar archivo de cuaderno" 31 | 32 | msgid "Copyright" 33 | msgstr "Derechos de autor" 34 | 35 | msgid "Download this page" 36 | msgstr "Descarga esta pagina" 37 | 38 | msgid "Source repository" 39 | msgstr "Repositorio de origen" 40 | 41 | msgid "By" 42 | msgstr "Por" 43 | 44 | msgid "repository" 45 | msgstr "repositorio" 46 | 47 | msgid "Last updated on" 48 | msgstr "Ultima actualización en" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Navegación de palanca" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Tema del libro de la esfinge" 55 | 56 | msgid "suggest edit" 57 | msgstr "sugerir editar" 58 | 59 | msgid "Open an issue" 60 | msgstr "Abrir un problema" 61 | 62 | msgid "Launch" 63 | msgstr "Lanzamiento" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Modo de pantalla completa" 67 | 68 | msgid "Edit this page" 69 | msgstr "Edita esta página" 70 | 71 | msgid "By the" 72 | msgstr "Por el" 73 | 74 | msgid "next page" 75 | msgstr "siguiente página" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/et/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/et/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/et/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: et\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Prindi PDF-i" 13 | 14 | msgid "Theme by the" 15 | msgstr "Teema" 16 | 17 | msgid "Download source file" 18 | msgstr "Laadige alla lähtefail" 19 | 20 | msgid "open issue" 21 | msgstr "avatud küsimus" 22 | 23 | msgid "Contents" 24 | msgstr "Sisu" 25 | 26 | msgid "previous page" 27 | msgstr "eelmine leht" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Laadige sülearvuti fail alla" 31 | 32 | msgid "Copyright" 33 | msgstr "Autoriõigus" 34 | 35 | msgid "Download this page" 36 | msgstr "Laadige see leht alla" 37 | 38 | msgid "Source repository" 39 | msgstr "Allikahoidla" 40 | 41 | msgid "By" 42 | msgstr "Kõrval" 43 | 44 | msgid "repository" 45 | msgstr "hoidla" 46 | 47 | msgid "Last updated on" 48 | msgstr "Viimati uuendatud" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Lülita navigeerimine sisse" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sfinksiraamatu teema" 55 | 56 | msgid "suggest edit" 57 | msgstr "soovita muuta" 58 | 59 | msgid "Open an issue" 60 | msgstr "Avage probleem" 61 | 62 | msgid "Launch" 63 | msgstr "Käivitage" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Täisekraanirežiim" 67 | 68 | msgid "Edit this page" 69 | msgstr "Muutke seda lehte" 70 | 71 | msgid "By the" 72 | msgstr "Autor" 73 | 74 | msgid "next page" 75 | msgstr "järgmine leht" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/fi/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/fi/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/fi/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: fi\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Tulosta PDF-tiedostoon" 13 | 14 | msgid "Theme by the" 15 | msgstr "Teeman tekijä" 16 | 17 | msgid "Download source file" 18 | msgstr "Lataa lähdetiedosto" 19 | 20 | msgid "open issue" 21 | msgstr "avoin ongelma" 22 | 23 | msgid "Contents" 24 | msgstr "Sisällys" 25 | 26 | msgid "previous page" 27 | msgstr "Edellinen sivu" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Lataa muistikirjatiedosto" 31 | 32 | msgid "Copyright" 33 | msgstr "Tekijänoikeus" 34 | 35 | msgid "Download this page" 36 | msgstr "Lataa tämä sivu" 37 | 38 | msgid "Source repository" 39 | msgstr "Lähteen arkisto" 40 | 41 | msgid "By" 42 | msgstr "Tekijä" 43 | 44 | msgid "repository" 45 | msgstr "arkisto" 46 | 47 | msgid "Last updated on" 48 | msgstr "Viimeksi päivitetty" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Vaihda navigointia" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sphinx-kirjan teema" 55 | 56 | msgid "suggest edit" 57 | msgstr "ehdottaa muokkausta" 58 | 59 | msgid "Open an issue" 60 | msgstr "Avaa ongelma" 61 | 62 | msgid "Launch" 63 | msgstr "Tuoda markkinoille" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Koko näytön tila" 67 | 68 | msgid "Edit this page" 69 | msgstr "Muokkaa tätä sivua" 70 | 71 | msgid "By the" 72 | msgstr "Mukaan" 73 | 74 | msgid "next page" 75 | msgstr "seuraava sivu" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/fr/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/fr/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/fr/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: fr\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Imprimer au format PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Thème par le" 16 | 17 | msgid "Download source file" 18 | msgstr "Télécharger le fichier source" 19 | 20 | msgid "open issue" 21 | msgstr "signaler un problème" 22 | 23 | msgid "Contents" 24 | msgstr "Contenu" 25 | 26 | msgid "previous page" 27 | msgstr "page précédente" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Télécharger le fichier notebook" 31 | 32 | msgid "Copyright" 33 | msgstr "droits d'auteur" 34 | 35 | msgid "Download this page" 36 | msgstr "Téléchargez cette page" 37 | 38 | msgid "Source repository" 39 | msgstr "Dépôt source" 40 | 41 | msgid "By" 42 | msgstr "Par" 43 | 44 | msgid "repository" 45 | msgstr "dépôt" 46 | 47 | msgid "Last updated on" 48 | msgstr "Dernière mise à jour le" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Basculer la navigation" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Thème du livre Sphinx" 55 | 56 | msgid "suggest edit" 57 | msgstr "suggestion de modification" 58 | 59 | msgid "Open an issue" 60 | msgstr "Ouvrez un problème" 61 | 62 | msgid "Launch" 63 | msgstr "lancement" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Mode plein écran" 67 | 68 | msgid "Edit this page" 69 | msgstr "Modifier cette page" 70 | 71 | msgid "By the" 72 | msgstr "Par le" 73 | 74 | msgid "next page" 75 | msgstr "page suivante" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/hr/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/hr/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/hr/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: hr\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Ispis u PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema autora" 16 | 17 | msgid "Download source file" 18 | msgstr "Preuzmi izvornu datoteku" 19 | 20 | msgid "open issue" 21 | msgstr "otvoreno izdanje" 22 | 23 | msgid "Contents" 24 | msgstr "Sadržaj" 25 | 26 | msgid "previous page" 27 | msgstr "Prethodna stranica" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Preuzmi datoteku bilježnice" 31 | 32 | msgid "Copyright" 33 | msgstr "Autorska prava" 34 | 35 | msgid "Download this page" 36 | msgstr "Preuzmite ovu stranicu" 37 | 38 | msgid "Source repository" 39 | msgstr "Izvorno spremište" 40 | 41 | msgid "By" 42 | msgstr "Po" 43 | 44 | msgid "repository" 45 | msgstr "spremište" 46 | 47 | msgid "Last updated on" 48 | msgstr "Posljednje ažuriranje:" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Uključi / isključi navigaciju" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Tema knjige Sphinx" 55 | 56 | msgid "suggest edit" 57 | msgstr "predloži uređivanje" 58 | 59 | msgid "Open an issue" 60 | msgstr "Otvorite izdanje" 61 | 62 | msgid "Launch" 63 | msgstr "Pokrenite" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Način preko cijelog zaslona" 67 | 68 | msgid "Edit this page" 69 | msgstr "Uredite ovu stranicu" 70 | 71 | msgid "By the" 72 | msgstr "Od strane" 73 | 74 | msgid "next page" 75 | msgstr "sljedeća stranica" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/id/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/id/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/id/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: id\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Cetak ke PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema oleh" 16 | 17 | msgid "Download source file" 18 | msgstr "Unduh file sumber" 19 | 20 | msgid "open issue" 21 | msgstr "masalah terbuka" 22 | 23 | msgid "Contents" 24 | msgstr "Isi" 25 | 26 | msgid "previous page" 27 | msgstr "halaman sebelumnya" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Unduh file notebook" 31 | 32 | msgid "Copyright" 33 | msgstr "hak cipta" 34 | 35 | msgid "Download this page" 36 | msgstr "Unduh halaman ini" 37 | 38 | msgid "Source repository" 39 | msgstr "Repositori sumber" 40 | 41 | msgid "By" 42 | msgstr "Oleh" 43 | 44 | msgid "repository" 45 | msgstr "gudang" 46 | 47 | msgid "Last updated on" 48 | msgstr "Terakhir diperbarui saat" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Alihkan navigasi" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Tema Buku Sphinx" 55 | 56 | msgid "suggest edit" 57 | msgstr "menyarankan edit" 58 | 59 | msgid "Open an issue" 60 | msgstr "Buka masalah" 61 | 62 | msgid "Launch" 63 | msgstr "Meluncurkan" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Mode layar penuh" 67 | 68 | msgid "Edit this page" 69 | msgstr "Edit halaman ini" 70 | 71 | msgid "By the" 72 | msgstr "Oleh" 73 | 74 | msgid "next page" 75 | msgstr "halaman selanjutnya" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/it/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/it/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/it/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: it\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Stampa in PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema di" 16 | 17 | msgid "Download source file" 18 | msgstr "Scarica il file sorgente" 19 | 20 | msgid "open issue" 21 | msgstr "questione aperta" 22 | 23 | msgid "Contents" 24 | msgstr "Contenuti" 25 | 26 | msgid "previous page" 27 | msgstr "pagina precedente" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Scarica il file del taccuino" 31 | 32 | msgid "Copyright" 33 | msgstr "Diritto d'autore" 34 | 35 | msgid "Download this page" 36 | msgstr "Scarica questa pagina" 37 | 38 | msgid "Source repository" 39 | msgstr "Repository di origine" 40 | 41 | msgid "By" 42 | msgstr "Di" 43 | 44 | msgid "repository" 45 | msgstr "repository" 46 | 47 | msgid "Last updated on" 48 | msgstr "Ultimo aggiornamento il" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Attiva / disattiva la navigazione" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Tema del libro della Sfinge" 55 | 56 | msgid "suggest edit" 57 | msgstr "suggerisci modifica" 58 | 59 | msgid "Open an issue" 60 | msgstr "Apri un problema" 61 | 62 | msgid "Launch" 63 | msgstr "Lanciare" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Modalità schermo intero" 67 | 68 | msgid "Edit this page" 69 | msgstr "Modifica questa pagina" 70 | 71 | msgid "By the" 72 | msgstr "Dal" 73 | 74 | msgid "next page" 75 | msgstr "pagina successiva" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/iw/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/iw/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/iw/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: iw\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "הדפס לקובץ PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "נושא מאת" 16 | 17 | msgid "Download source file" 18 | msgstr "הורד את קובץ המקור" 19 | 20 | msgid "open issue" 21 | msgstr "בעיה פתוחה" 22 | 23 | msgid "Contents" 24 | msgstr "תוכן" 25 | 26 | msgid "previous page" 27 | msgstr "עמוד קודם" 28 | 29 | msgid "Download notebook file" 30 | msgstr "הורד קובץ מחברת" 31 | 32 | msgid "Copyright" 33 | msgstr "זכויות יוצרים" 34 | 35 | msgid "Download this page" 36 | msgstr "הורד דף זה" 37 | 38 | msgid "Source repository" 39 | msgstr "מאגר המקורות" 40 | 41 | msgid "By" 42 | msgstr "על ידי" 43 | 44 | msgid "repository" 45 | msgstr "מאגר" 46 | 47 | msgid "Last updated on" 48 | msgstr "עודכן לאחרונה ב" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "החלף ניווט" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "נושא ספר ספינקס" 55 | 56 | msgid "suggest edit" 57 | msgstr "מציע לערוך" 58 | 59 | msgid "Open an issue" 60 | msgstr "פתח גיליון" 61 | 62 | msgid "Launch" 63 | msgstr "לְהַשִׁיק" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "מצב מסך מלא" 67 | 68 | msgid "Edit this page" 69 | msgstr "ערוך דף זה" 70 | 71 | msgid "By the" 72 | msgstr "דרך" 73 | 74 | msgid "next page" 75 | msgstr "עמוד הבא" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ja/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/ja/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ja/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: ja\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "PDFに印刷" 13 | 14 | msgid "Theme by the" 15 | msgstr "のテーマ" 16 | 17 | msgid "Download source file" 18 | msgstr "ソースファイルをダウンロード" 19 | 20 | msgid "open issue" 21 | msgstr "未解決の問題" 22 | 23 | msgid "Contents" 24 | msgstr "目次" 25 | 26 | msgid "previous page" 27 | msgstr "前のページ" 28 | 29 | msgid "Download notebook file" 30 | msgstr "ノートブックファイルをダウンロード" 31 | 32 | msgid "Copyright" 33 | msgstr "Copyright" 34 | 35 | msgid "Download this page" 36 | msgstr "このページをダウンロード" 37 | 38 | msgid "Source repository" 39 | msgstr "ソースリポジトリ" 40 | 41 | msgid "By" 42 | msgstr "著者" 43 | 44 | msgid "repository" 45 | msgstr "リポジトリ" 46 | 47 | msgid "Last updated on" 48 | msgstr "最終更新日" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "ナビゲーションを切り替え" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "スフィンクスの本のテーマ" 55 | 56 | msgid "suggest edit" 57 | msgstr "編集を提案する" 58 | 59 | msgid "Open an issue" 60 | msgstr "問題を報告" 61 | 62 | msgid "Launch" 63 | msgstr "起動" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "全画面モード" 67 | 68 | msgid "Edit this page" 69 | msgstr "このページを編集" 70 | 71 | msgid "By the" 72 | msgstr "によって" 73 | 74 | msgid "next page" 75 | msgstr "次のページ" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ko/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/ko/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ko/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: ko\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "PDF로 인쇄" 13 | 14 | msgid "Theme by the" 15 | msgstr "테마별" 16 | 17 | msgid "Download source file" 18 | msgstr "소스 파일 다운로드" 19 | 20 | msgid "open issue" 21 | msgstr "열린 문제" 22 | 23 | msgid "Contents" 24 | msgstr "내용" 25 | 26 | msgid "previous page" 27 | msgstr "이전 페이지" 28 | 29 | msgid "Download notebook file" 30 | msgstr "노트북 파일 다운로드" 31 | 32 | msgid "Copyright" 33 | msgstr "저작권" 34 | 35 | msgid "Download this page" 36 | msgstr "이 페이지 다운로드" 37 | 38 | msgid "Source repository" 39 | msgstr "소스 저장소" 40 | 41 | msgid "By" 42 | msgstr "으로" 43 | 44 | msgid "repository" 45 | msgstr "저장소" 46 | 47 | msgid "Last updated on" 48 | msgstr "마지막 업데이트" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "탐색 전환" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "스핑크스 도서 테마" 55 | 56 | msgid "suggest edit" 57 | msgstr "편집 제안" 58 | 59 | msgid "Open an issue" 60 | msgstr "이슈 열기" 61 | 62 | msgid "Launch" 63 | msgstr "시작하다" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "전체 화면으로보기" 67 | 68 | msgid "Edit this page" 69 | msgstr "이 페이지 편집" 70 | 71 | msgid "By the" 72 | msgstr "에 의해" 73 | 74 | msgid "next page" 75 | msgstr "다음 페이지" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/lt/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/lt/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/lt/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: lt\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Spausdinti į PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema" 16 | 17 | msgid "Download source file" 18 | msgstr "Atsisiųsti šaltinio failą" 19 | 20 | msgid "open issue" 21 | msgstr "atviras klausimas" 22 | 23 | msgid "Contents" 24 | msgstr "Turinys" 25 | 26 | msgid "previous page" 27 | msgstr "Ankstesnis puslapis" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Atsisiųsti nešiojamojo kompiuterio failą" 31 | 32 | msgid "Copyright" 33 | msgstr "Autorių teisės" 34 | 35 | msgid "Download this page" 36 | msgstr "Atsisiųskite šį puslapį" 37 | 38 | msgid "Source repository" 39 | msgstr "Šaltinio saugykla" 40 | 41 | msgid "By" 42 | msgstr "Iki" 43 | 44 | msgid "repository" 45 | msgstr "saugykla" 46 | 47 | msgid "Last updated on" 48 | msgstr "Paskutinį kartą atnaujinta" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Perjungti naršymą" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sfinkso knygos tema" 55 | 56 | msgid "suggest edit" 57 | msgstr "pasiūlyti redaguoti" 58 | 59 | msgid "Open an issue" 60 | msgstr "Atidarykite problemą" 61 | 62 | msgid "Launch" 63 | msgstr "Paleiskite" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Pilno ekrano režimas" 67 | 68 | msgid "Edit this page" 69 | msgstr "Redaguoti šį puslapį" 70 | 71 | msgid "By the" 72 | msgstr "Prie" 73 | 74 | msgid "next page" 75 | msgstr "Kitas puslapis" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/lv/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/lv/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/lv/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: lv\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Drukāt PDF formātā" 13 | 14 | msgid "Theme by the" 15 | msgstr "Autora tēma" 16 | 17 | msgid "Download source file" 18 | msgstr "Lejupielādēt avota failu" 19 | 20 | msgid "open issue" 21 | msgstr "atklāts jautājums" 22 | 23 | msgid "Contents" 24 | msgstr "Saturs" 25 | 26 | msgid "previous page" 27 | msgstr "iepriekšējā lapa" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Lejupielādēt piezīmju grāmatiņu" 31 | 32 | msgid "Copyright" 33 | msgstr "Autortiesības" 34 | 35 | msgid "Download this page" 36 | msgstr "Lejupielādējiet šo lapu" 37 | 38 | msgid "Source repository" 39 | msgstr "Avota krātuve" 40 | 41 | msgid "By" 42 | msgstr "Autors" 43 | 44 | msgid "repository" 45 | msgstr "krātuve" 46 | 47 | msgid "Last updated on" 48 | msgstr "Pēdējoreiz atjaunināts" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Pārslēgt navigāciju" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sfinksa grāmatas tēma" 55 | 56 | msgid "suggest edit" 57 | msgstr "ieteikt rediģēt" 58 | 59 | msgid "Open an issue" 60 | msgstr "Atveriet problēmu" 61 | 62 | msgid "Launch" 63 | msgstr "Uzsākt" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Pilnekrāna režīms" 67 | 68 | msgid "Edit this page" 69 | msgstr "Rediģēt šo lapu" 70 | 71 | msgid "By the" 72 | msgstr "Ar" 73 | 74 | msgid "next page" 75 | msgstr "nākamā lapaspuse" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ml/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/ml/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ml/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: ml\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "PDF- ലേക്ക് പ്രിന്റുചെയ്യുക" 13 | 14 | msgid "Theme by the" 15 | msgstr "പ്രമേയം" 16 | 17 | msgid "Download source file" 18 | msgstr "ഉറവിട ഫയൽ ഡൗൺലോഡുചെയ്യുക" 19 | 20 | msgid "open issue" 21 | msgstr "തുറന്ന പ്രശ്നം" 22 | 23 | msgid "previous page" 24 | msgstr "മുൻപത്തെ താൾ" 25 | 26 | msgid "Download notebook file" 27 | msgstr "നോട്ട്ബുക്ക് ഫയൽ ഡൺലോഡ് ചെയ്യുക" 28 | 29 | msgid "Copyright" 30 | msgstr "പകർപ്പവകാശം" 31 | 32 | msgid "Download this page" 33 | msgstr "ഈ പേജ് ഡൗൺലോഡുചെയ്യുക" 34 | 35 | msgid "Source repository" 36 | msgstr "ഉറവിട ശേഖരം" 37 | 38 | msgid "By" 39 | msgstr "എഴുതിയത്" 40 | 41 | msgid "Last updated on" 42 | msgstr "അവസാനം അപ്‌ഡേറ്റുചെയ്‌തത്" 43 | 44 | msgid "Toggle navigation" 45 | msgstr "നാവിഗേഷൻ ടോഗിൾ ചെയ്യുക" 46 | 47 | msgid "Sphinx Book Theme" 48 | msgstr "സ്ഫിങ്ക്സ് പുസ്തക തീം" 49 | 50 | msgid "suggest edit" 51 | msgstr "എഡിറ്റുചെയ്യാൻ നിർദ്ദേശിക്കുക" 52 | 53 | msgid "Open an issue" 54 | msgstr "ഒരു പ്രശ്നം തുറക്കുക" 55 | 56 | msgid "Launch" 57 | msgstr "സമാരംഭിക്കുക" 58 | 59 | msgid "Edit this page" 60 | msgstr "ഈ പേജ് എഡിറ്റുചെയ്യുക" 61 | 62 | msgid "By the" 63 | msgstr "എഴുതിയത്" 64 | 65 | msgid "next page" 66 | msgstr "അടുത്ത പേജ്" 67 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/mr/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/mr/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/mr/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: mr\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "पीडीएफवर मुद्रित करा" 13 | 14 | msgid "Theme by the" 15 | msgstr "द्वारा थीम" 16 | 17 | msgid "Download source file" 18 | msgstr "स्त्रोत फाइल डाउनलोड करा" 19 | 20 | msgid "open issue" 21 | msgstr "खुला मुद्दा" 22 | 23 | msgid "previous page" 24 | msgstr "मागील पान" 25 | 26 | msgid "Download notebook file" 27 | msgstr "नोटबुक फाईल डाउनलोड करा" 28 | 29 | msgid "Copyright" 30 | msgstr "कॉपीराइट" 31 | 32 | msgid "Download this page" 33 | msgstr "हे पृष्ठ डाउनलोड करा" 34 | 35 | msgid "Source repository" 36 | msgstr "स्त्रोत भांडार" 37 | 38 | msgid "By" 39 | msgstr "द्वारा" 40 | 41 | msgid "Last updated on" 42 | msgstr "अखेरचे अद्यतनित" 43 | 44 | msgid "Toggle navigation" 45 | msgstr "नेव्हिगेशन टॉगल करा" 46 | 47 | msgid "Sphinx Book Theme" 48 | msgstr "स्फिंक्स बुक थीम" 49 | 50 | msgid "suggest edit" 51 | msgstr "संपादन सुचवा" 52 | 53 | msgid "Open an issue" 54 | msgstr "एक मुद्दा उघडा" 55 | 56 | msgid "Launch" 57 | msgstr "लाँच करा" 58 | 59 | msgid "Edit this page" 60 | msgstr "हे पृष्ठ संपादित करा" 61 | 62 | msgid "By the" 63 | msgstr "द्वारा" 64 | 65 | msgid "next page" 66 | msgstr "पुढील पृष्ठ" 67 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ms/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/ms/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ms/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: ms\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Cetak ke PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema oleh" 16 | 17 | msgid "Download source file" 18 | msgstr "Muat turun fail sumber" 19 | 20 | msgid "open issue" 21 | msgstr "isu terbuka" 22 | 23 | msgid "previous page" 24 | msgstr "halaman sebelumnya" 25 | 26 | msgid "Download notebook file" 27 | msgstr "Muat turun fail buku nota" 28 | 29 | msgid "Copyright" 30 | msgstr "hak cipta" 31 | 32 | msgid "Download this page" 33 | msgstr "Muat turun halaman ini" 34 | 35 | msgid "Source repository" 36 | msgstr "Repositori sumber" 37 | 38 | msgid "By" 39 | msgstr "Oleh" 40 | 41 | msgid "Last updated on" 42 | msgstr "Terakhir dikemas kini pada" 43 | 44 | msgid "Toggle navigation" 45 | msgstr "Togol navigasi" 46 | 47 | msgid "Sphinx Book Theme" 48 | msgstr "Tema Buku Sphinx" 49 | 50 | msgid "suggest edit" 51 | msgstr "cadangkan edit" 52 | 53 | msgid "Open an issue" 54 | msgstr "Buka masalah" 55 | 56 | msgid "Launch" 57 | msgstr "Lancarkan" 58 | 59 | msgid "Edit this page" 60 | msgstr "Edit halaman ini" 61 | 62 | msgid "By the" 63 | msgstr "Oleh" 64 | 65 | msgid "next page" 66 | msgstr "muka surat seterusnya" 67 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/nl/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/nl/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/nl/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: nl\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Afdrukken naar pdf" 13 | 14 | msgid "Theme by the" 15 | msgstr "Thema door de" 16 | 17 | msgid "Download source file" 18 | msgstr "Download het bronbestand" 19 | 20 | msgid "open issue" 21 | msgstr "open probleem" 22 | 23 | msgid "Contents" 24 | msgstr "Inhoud" 25 | 26 | msgid "previous page" 27 | msgstr "vorige pagina" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Download notebookbestand" 31 | 32 | msgid "Copyright" 33 | msgstr "auteursrechten" 34 | 35 | msgid "Download this page" 36 | msgstr "Download deze pagina" 37 | 38 | msgid "Source repository" 39 | msgstr "Bronopslagplaats" 40 | 41 | msgid "By" 42 | msgstr "Door" 43 | 44 | msgid "repository" 45 | msgstr "repository" 46 | 47 | msgid "Last updated on" 48 | msgstr "Laatst geupdate op" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Schakel navigatie" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sphinx-boekthema" 55 | 56 | msgid "suggest edit" 57 | msgstr "suggereren bewerken" 58 | 59 | msgid "Open an issue" 60 | msgstr "Open een probleem" 61 | 62 | msgid "Launch" 63 | msgstr "Lancering" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Volledig scherm" 67 | 68 | msgid "Edit this page" 69 | msgstr "bewerk deze pagina" 70 | 71 | msgid "By the" 72 | msgstr "Door de" 73 | 74 | msgid "next page" 75 | msgstr "volgende bladzijde" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/no/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/no/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/no/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: no\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Skriv ut til PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema av" 16 | 17 | msgid "Download source file" 18 | msgstr "Last ned kildefilen" 19 | 20 | msgid "open issue" 21 | msgstr "åpent nummer" 22 | 23 | msgid "Contents" 24 | msgstr "Innhold" 25 | 26 | msgid "previous page" 27 | msgstr "forrige side" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Last ned notatbokfilen" 31 | 32 | msgid "Copyright" 33 | msgstr "opphavsrett" 34 | 35 | msgid "Download this page" 36 | msgstr "Last ned denne siden" 37 | 38 | msgid "Source repository" 39 | msgstr "Kildedepot" 40 | 41 | msgid "By" 42 | msgstr "Av" 43 | 44 | msgid "repository" 45 | msgstr "oppbevaringssted" 46 | 47 | msgid "Last updated on" 48 | msgstr "Sist oppdatert den" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Bytt navigasjon" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sphinx boktema" 55 | 56 | msgid "suggest edit" 57 | msgstr "foreslå redigering" 58 | 59 | msgid "Open an issue" 60 | msgstr "Åpne et problem" 61 | 62 | msgid "Launch" 63 | msgstr "Start" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Fullskjerm-modus" 67 | 68 | msgid "Edit this page" 69 | msgstr "Rediger denne siden" 70 | 71 | msgid "By the" 72 | msgstr "Ved" 73 | 74 | msgid "next page" 75 | msgstr "neste side" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/pl/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/pl/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/pl/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: pl\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Drukuj do PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Motyw autorstwa" 16 | 17 | msgid "Download source file" 18 | msgstr "Pobierz plik źródłowy" 19 | 20 | msgid "open issue" 21 | msgstr "otwarty problem" 22 | 23 | msgid "Contents" 24 | msgstr "Zawartość" 25 | 26 | msgid "previous page" 27 | msgstr "Poprzednia strona" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Pobierz plik notatnika" 31 | 32 | msgid "Copyright" 33 | msgstr "prawa autorskie" 34 | 35 | msgid "Download this page" 36 | msgstr "Pobierz tę stronę" 37 | 38 | msgid "Source repository" 39 | msgstr "Repozytorium źródłowe" 40 | 41 | msgid "By" 42 | msgstr "Przez" 43 | 44 | msgid "repository" 45 | msgstr "magazyn" 46 | 47 | msgid "Last updated on" 48 | msgstr "Ostatnia aktualizacja" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Przełącz nawigację" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Motyw książki Sphinx" 55 | 56 | msgid "suggest edit" 57 | msgstr "zaproponuj edycję" 58 | 59 | msgid "Open an issue" 60 | msgstr "Otwórz problem" 61 | 62 | msgid "Launch" 63 | msgstr "Uruchomić" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Pełny ekran" 67 | 68 | msgid "Edit this page" 69 | msgstr "Edytuj tę strone" 70 | 71 | msgid "By the" 72 | msgstr "Przez" 73 | 74 | msgid "next page" 75 | msgstr "Następna strona" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/pt/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/pt/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/pt/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: pt\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Imprimir em PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema por" 16 | 17 | msgid "Download source file" 18 | msgstr "Baixar arquivo fonte" 19 | 20 | msgid "open issue" 21 | msgstr "questão aberta" 22 | 23 | msgid "Contents" 24 | msgstr "Conteúdo" 25 | 26 | msgid "previous page" 27 | msgstr "página anterior" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Baixar arquivo de notebook" 31 | 32 | msgid "Copyright" 33 | msgstr "direito autoral" 34 | 35 | msgid "Download this page" 36 | msgstr "Baixe esta página" 37 | 38 | msgid "Source repository" 39 | msgstr "Repositório fonte" 40 | 41 | msgid "By" 42 | msgstr "De" 43 | 44 | msgid "repository" 45 | msgstr "repositório" 46 | 47 | msgid "Last updated on" 48 | msgstr "Última atualização em" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Alternar de navegação" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Tema do livro Sphinx" 55 | 56 | msgid "suggest edit" 57 | msgstr "sugerir edição" 58 | 59 | msgid "Open an issue" 60 | msgstr "Abra um problema" 61 | 62 | msgid "Launch" 63 | msgstr "Lançamento" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Modo tela cheia" 67 | 68 | msgid "Edit this page" 69 | msgstr "Edite essa página" 70 | 71 | msgid "By the" 72 | msgstr "Pelo" 73 | 74 | msgid "next page" 75 | msgstr "próxima página" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ro/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/ro/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ro/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: ro\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Imprimați în PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema de" 16 | 17 | msgid "Download source file" 18 | msgstr "Descărcați fișierul sursă" 19 | 20 | msgid "open issue" 21 | msgstr "problema deschisă" 22 | 23 | msgid "Contents" 24 | msgstr "Cuprins" 25 | 26 | msgid "previous page" 27 | msgstr "pagina anterioară" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Descărcați fișierul notebook" 31 | 32 | msgid "Copyright" 33 | msgstr "Drepturi de autor" 34 | 35 | msgid "Download this page" 36 | msgstr "Descarcă această pagină" 37 | 38 | msgid "Source repository" 39 | msgstr "Depozit sursă" 40 | 41 | msgid "By" 42 | msgstr "De" 43 | 44 | msgid "repository" 45 | msgstr "repertoriu" 46 | 47 | msgid "Last updated on" 48 | msgstr "Ultima actualizare la" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Comutare navigare" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Tema Sphinx Book" 55 | 56 | msgid "suggest edit" 57 | msgstr "sugerează editare" 58 | 59 | msgid "Open an issue" 60 | msgstr "Deschideți o problemă" 61 | 62 | msgid "Launch" 63 | msgstr "Lansa" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Modul ecran întreg" 67 | 68 | msgid "Edit this page" 69 | msgstr "Editați această pagină" 70 | 71 | msgid "By the" 72 | msgstr "Langa" 73 | 74 | msgid "next page" 75 | msgstr "pagina următoare" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ru/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/ru/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ru/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: ru\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Распечатать в PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Тема от" 16 | 17 | msgid "Download source file" 18 | msgstr "Скачать исходный файл" 19 | 20 | msgid "open issue" 21 | msgstr "открытый вопрос" 22 | 23 | msgid "Contents" 24 | msgstr "Содержание" 25 | 26 | msgid "previous page" 27 | msgstr "Предыдущая страница" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Скачать файл записной книжки" 31 | 32 | msgid "Copyright" 33 | msgstr "авторское право" 34 | 35 | msgid "Download this page" 36 | msgstr "Загрузите эту страницу" 37 | 38 | msgid "Source repository" 39 | msgstr "Исходный репозиторий" 40 | 41 | msgid "By" 42 | msgstr "По" 43 | 44 | msgid "repository" 45 | msgstr "хранилище" 46 | 47 | msgid "Last updated on" 48 | msgstr "Последнее обновление" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Переключить навигацию" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Тема книги Сфинкс" 55 | 56 | msgid "suggest edit" 57 | msgstr "предложить редактировать" 58 | 59 | msgid "Open an issue" 60 | msgstr "Открыть вопрос" 61 | 62 | msgid "Launch" 63 | msgstr "Запуск" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Полноэкранный режим" 67 | 68 | msgid "Edit this page" 69 | msgstr "Редактировать эту страницу" 70 | 71 | msgid "By the" 72 | msgstr "Посредством" 73 | 74 | msgid "next page" 75 | msgstr "Следующая страница" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/sk/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/sk/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/sk/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: sk\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Tlač do PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Téma od" 16 | 17 | msgid "Download source file" 18 | msgstr "Stiahnite si zdrojový súbor" 19 | 20 | msgid "open issue" 21 | msgstr "otvorené vydanie" 22 | 23 | msgid "Contents" 24 | msgstr "Obsah" 25 | 26 | msgid "previous page" 27 | msgstr "predchádzajúca strana" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Stiahnite si zošit" 31 | 32 | msgid "Copyright" 33 | msgstr "Autorské práva" 34 | 35 | msgid "Download this page" 36 | msgstr "Stiahnite si túto stránku" 37 | 38 | msgid "Source repository" 39 | msgstr "Zdrojové úložisko" 40 | 41 | msgid "By" 42 | msgstr "Autor:" 43 | 44 | msgid "repository" 45 | msgstr "Úložisko" 46 | 47 | msgid "Last updated on" 48 | msgstr "Posledná aktualizácia dňa" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Prepnúť navigáciu" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Téma knihy Sfinga" 55 | 56 | msgid "suggest edit" 57 | msgstr "navrhnúť úpravu" 58 | 59 | msgid "Open an issue" 60 | msgstr "Otvorte problém" 61 | 62 | msgid "Launch" 63 | msgstr "Spustiť" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Režim celej obrazovky" 67 | 68 | msgid "Edit this page" 69 | msgstr "Upraviť túto stránku" 70 | 71 | msgid "By the" 72 | msgstr "Podľa" 73 | 74 | msgid "next page" 75 | msgstr "ďalšia strana" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/sl/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/sl/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/sl/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: sl\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Natisni v PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema avtorja" 16 | 17 | msgid "Download source file" 18 | msgstr "Prenesite izvorno datoteko" 19 | 20 | msgid "open issue" 21 | msgstr "odprto vprašanje" 22 | 23 | msgid "Contents" 24 | msgstr "Vsebina" 25 | 26 | msgid "previous page" 27 | msgstr "Prejšnja stran" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Prenesite datoteko zvezka" 31 | 32 | msgid "Copyright" 33 | msgstr "avtorske pravice" 34 | 35 | msgid "Download this page" 36 | msgstr "Prenesite to stran" 37 | 38 | msgid "Source repository" 39 | msgstr "Izvorno skladišče" 40 | 41 | msgid "By" 42 | msgstr "Avtor" 43 | 44 | msgid "repository" 45 | msgstr "odlagališče" 46 | 47 | msgid "Last updated on" 48 | msgstr "Nazadnje posodobljeno dne" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Preklopi navigacijo" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Tema knjige Sphinx" 55 | 56 | msgid "suggest edit" 57 | msgstr "predlagajte urejanje" 58 | 59 | msgid "Open an issue" 60 | msgstr "Odprite številko" 61 | 62 | msgid "Launch" 63 | msgstr "Kosilo" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Celozaslonski način" 67 | 68 | msgid "Edit this page" 69 | msgstr "Uredite to stran" 70 | 71 | msgid "By the" 72 | msgstr "Avtor" 73 | 74 | msgid "next page" 75 | msgstr "Naslednja stran" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/sr/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/sr/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/sr/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: sr\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Испис у ПДФ" 13 | 14 | msgid "Theme by the" 15 | msgstr "Тхеме би" 16 | 17 | msgid "Download source file" 18 | msgstr "Преузми изворну датотеку" 19 | 20 | msgid "open issue" 21 | msgstr "отворено издање" 22 | 23 | msgid "Contents" 24 | msgstr "Садржај" 25 | 26 | msgid "previous page" 27 | msgstr "Претходна страница" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Преузмите датотеку бележнице" 31 | 32 | msgid "Copyright" 33 | msgstr "Ауторско право" 34 | 35 | msgid "Download this page" 36 | msgstr "Преузмите ову страницу" 37 | 38 | msgid "Source repository" 39 | msgstr "Изворно спремиште" 40 | 41 | msgid "By" 42 | msgstr "Од стране" 43 | 44 | msgid "repository" 45 | msgstr "спремиште" 46 | 47 | msgid "Last updated on" 48 | msgstr "Последње ажурирање" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Укључи / искључи навигацију" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Тема књиге Спхинк" 55 | 56 | msgid "suggest edit" 57 | msgstr "предложи уређивање" 58 | 59 | msgid "Open an issue" 60 | msgstr "Отворите издање" 61 | 62 | msgid "Launch" 63 | msgstr "Лансирање" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Режим целог екрана" 67 | 68 | msgid "Edit this page" 69 | msgstr "Уредите ову страницу" 70 | 71 | msgid "By the" 72 | msgstr "Од" 73 | 74 | msgid "next page" 75 | msgstr "Следећа страна" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/sv/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/sv/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/sv/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: sv\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Skriv ut till PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema av" 16 | 17 | msgid "Download source file" 18 | msgstr "Ladda ner källfil" 19 | 20 | msgid "open issue" 21 | msgstr "öppna problemrapport" 22 | 23 | msgid "Contents" 24 | msgstr "Innehåll" 25 | 26 | msgid "previous page" 27 | msgstr "föregående sida" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Ladda ner notebook-fil" 31 | 32 | msgid "Copyright" 33 | msgstr "Upphovsrätt" 34 | 35 | msgid "Download this page" 36 | msgstr "Ladda ner den här sidan" 37 | 38 | msgid "Source repository" 39 | msgstr "Källkodsrepositorium" 40 | 41 | msgid "By" 42 | msgstr "Av" 43 | 44 | msgid "repository" 45 | msgstr "repositorium" 46 | 47 | msgid "Last updated on" 48 | msgstr "Senast uppdaterad den" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Växla navigering" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sphinx Boktema" 55 | 56 | msgid "suggest edit" 57 | msgstr "föreslå ändring" 58 | 59 | msgid "Open an issue" 60 | msgstr "Öppna en problemrapport" 61 | 62 | msgid "Launch" 63 | msgstr "Öppna" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Fullskärmsläge" 67 | 68 | msgid "Edit this page" 69 | msgstr "Redigera den här sidan" 70 | 71 | msgid "By the" 72 | msgstr "Av den" 73 | 74 | msgid "next page" 75 | msgstr "nästa sida" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ta/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/ta/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ta/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: ta\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "PDF இல் அச்சிடுக" 13 | 14 | msgid "Theme by the" 15 | msgstr "வழங்கிய தீம்" 16 | 17 | msgid "Download source file" 18 | msgstr "மூல கோப்பைப் பதிவிறக்குக" 19 | 20 | msgid "open issue" 21 | msgstr "திறந்த பிரச்சினை" 22 | 23 | msgid "previous page" 24 | msgstr "முந்தைய பக்கம்" 25 | 26 | msgid "Download notebook file" 27 | msgstr "நோட்புக் கோப்பைப் பதிவிறக்கவும்" 28 | 29 | msgid "Copyright" 30 | msgstr "பதிப்புரிமை" 31 | 32 | msgid "Download this page" 33 | msgstr "இந்தப் பக்கத்தைப் பதிவிறக்கவும்" 34 | 35 | msgid "Source repository" 36 | msgstr "மூல களஞ்சியம்" 37 | 38 | msgid "By" 39 | msgstr "வழங்கியவர்" 40 | 41 | msgid "Last updated on" 42 | msgstr "கடைசியாக புதுப்பிக்கப்பட்டது" 43 | 44 | msgid "Toggle navigation" 45 | msgstr "வழிசெலுத்தலை நிலைமாற்று" 46 | 47 | msgid "Sphinx Book Theme" 48 | msgstr "ஸ்பிங்க்ஸ் புத்தக தீம்" 49 | 50 | msgid "suggest edit" 51 | msgstr "திருத்த பரிந்துரைக்கவும்" 52 | 53 | msgid "Open an issue" 54 | msgstr "சிக்கலைத் திறக்கவும்" 55 | 56 | msgid "Launch" 57 | msgstr "தொடங்க" 58 | 59 | msgid "Edit this page" 60 | msgstr "இந்தப் பக்கத்தைத் திருத்தவும்" 61 | 62 | msgid "By the" 63 | msgstr "மூலம்" 64 | 65 | msgid "next page" 66 | msgstr "அடுத்த பக்கம்" 67 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/te/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/te/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/te/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: te\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "PDF కి ముద్రించండి" 13 | 14 | msgid "Theme by the" 15 | msgstr "ద్వారా థీమ్" 16 | 17 | msgid "Download source file" 18 | msgstr "మూల ఫైల్‌ను డౌన్‌లోడ్ చేయండి" 19 | 20 | msgid "open issue" 21 | msgstr "ఓపెన్ ఇష్యూ" 22 | 23 | msgid "previous page" 24 | msgstr "ముందు పేజి" 25 | 26 | msgid "Download notebook file" 27 | msgstr "నోట్బుక్ ఫైల్ను డౌన్లోడ్ చేయండి" 28 | 29 | msgid "Copyright" 30 | msgstr "కాపీరైట్" 31 | 32 | msgid "Download this page" 33 | msgstr "ఈ పేజీని డౌన్‌లోడ్ చేయండి" 34 | 35 | msgid "Source repository" 36 | msgstr "మూల రిపోజిటరీ" 37 | 38 | msgid "By" 39 | msgstr "ద్వారా" 40 | 41 | msgid "Last updated on" 42 | msgstr "చివరిగా నవీకరించబడింది" 43 | 44 | msgid "Toggle navigation" 45 | msgstr "నావిగేషన్‌ను టోగుల్ చేయండి" 46 | 47 | msgid "Sphinx Book Theme" 48 | msgstr "సింహిక పుస్తక థీమ్" 49 | 50 | msgid "suggest edit" 51 | msgstr "సవరించమని సూచించండి" 52 | 53 | msgid "Open an issue" 54 | msgstr "సమస్యను తెరవండి" 55 | 56 | msgid "Launch" 57 | msgstr "ప్రారంభించండి" 58 | 59 | msgid "Edit this page" 60 | msgstr "ఈ పేజీని సవరించండి" 61 | 62 | msgid "By the" 63 | msgstr "ద్వారా" 64 | 65 | msgid "next page" 66 | msgstr "తరువాతి పేజీ" 67 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/tg/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/tg/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/tg/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: tg\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Чоп ба PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Мавзӯъи аз" 16 | 17 | msgid "Download source file" 18 | msgstr "Файли манбаъро зеркашӣ кунед" 19 | 20 | msgid "open issue" 21 | msgstr "барориши кушод" 22 | 23 | msgid "Contents" 24 | msgstr "Мундариҷа" 25 | 26 | msgid "previous page" 27 | msgstr "саҳифаи қаблӣ" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Файли дафтарро зеркашӣ кунед" 31 | 32 | msgid "Copyright" 33 | msgstr "Ҳуқуқи муаллиф" 34 | 35 | msgid "Download this page" 36 | msgstr "Ин саҳифаро зеркашӣ кунед" 37 | 38 | msgid "Source repository" 39 | msgstr "Анбори манбаъ" 40 | 41 | msgid "By" 42 | msgstr "Бо" 43 | 44 | msgid "repository" 45 | msgstr "анбор" 46 | 47 | msgid "Last updated on" 48 | msgstr "Last навсозӣ дар" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Гузаришро иваз кунед" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Сфинкс Мавзӯи китоб" 55 | 56 | msgid "suggest edit" 57 | msgstr "пешниҳод вироиш" 58 | 59 | msgid "Open an issue" 60 | msgstr "Масъаларо кушоед" 61 | 62 | msgid "Launch" 63 | msgstr "Оғоз" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Ҳолати экрани пурра" 67 | 68 | msgid "Edit this page" 69 | msgstr "Ин саҳифаро таҳрир кунед" 70 | 71 | msgid "By the" 72 | msgstr "Бо" 73 | 74 | msgid "next page" 75 | msgstr "саҳифаи оянда" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/th/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/th/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/th/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: th\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "พิมพ์เป็น PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "ธีมโดย" 16 | 17 | msgid "Download source file" 18 | msgstr "ดาวน์โหลดไฟล์ต้นฉบับ" 19 | 20 | msgid "open issue" 21 | msgstr "เปิดปัญหา" 22 | 23 | msgid "Contents" 24 | msgstr "สารบัญ" 25 | 26 | msgid "previous page" 27 | msgstr "หน้าที่แล้ว" 28 | 29 | msgid "Download notebook file" 30 | msgstr "ดาวน์โหลดไฟล์สมุดบันทึก" 31 | 32 | msgid "Copyright" 33 | msgstr "ลิขสิทธิ์" 34 | 35 | msgid "Download this page" 36 | msgstr "ดาวน์โหลดหน้านี้" 37 | 38 | msgid "Source repository" 39 | msgstr "ที่เก็บซอร์ส" 40 | 41 | msgid "By" 42 | msgstr "โดย" 43 | 44 | msgid "repository" 45 | msgstr "ที่เก็บ" 46 | 47 | msgid "Last updated on" 48 | msgstr "ปรับปรุงล่าสุดเมื่อ" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "ไม่ต้องสลับช่องทาง" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "ธีมหนังสือสฟิงซ์" 55 | 56 | msgid "suggest edit" 57 | msgstr "แนะนำแก้ไข" 58 | 59 | msgid "Open an issue" 60 | msgstr "เปิดปัญหา" 61 | 62 | msgid "Launch" 63 | msgstr "เปิด" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "โหมดเต็มหน้าจอ" 67 | 68 | msgid "Edit this page" 69 | msgstr "แก้ไขหน้านี้" 70 | 71 | msgid "By the" 72 | msgstr "โดย" 73 | 74 | msgid "next page" 75 | msgstr "หน้าต่อไป" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/tl/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/tl/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/tl/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: tl\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "I-print sa PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tema ng" 16 | 17 | msgid "Download source file" 18 | msgstr "Mag-download ng file ng pinagmulan" 19 | 20 | msgid "open issue" 21 | msgstr "bukas na isyu" 22 | 23 | msgid "previous page" 24 | msgstr "Nakaraang pahina" 25 | 26 | msgid "Download notebook file" 27 | msgstr "Mag-download ng file ng notebook" 28 | 29 | msgid "Copyright" 30 | msgstr "Copyright" 31 | 32 | msgid "Download this page" 33 | msgstr "I-download ang pahinang ito" 34 | 35 | msgid "Source repository" 36 | msgstr "Pinagmulan ng imbakan" 37 | 38 | msgid "By" 39 | msgstr "Ni" 40 | 41 | msgid "Last updated on" 42 | msgstr "Huling na-update noong" 43 | 44 | msgid "Toggle navigation" 45 | msgstr "I-toggle ang pag-navigate" 46 | 47 | msgid "Sphinx Book Theme" 48 | msgstr "Tema ng Sphinx Book" 49 | 50 | msgid "suggest edit" 51 | msgstr "iminumungkahi i-edit" 52 | 53 | msgid "Open an issue" 54 | msgstr "Magbukas ng isyu" 55 | 56 | msgid "Launch" 57 | msgstr "Ilunsad" 58 | 59 | msgid "Edit this page" 60 | msgstr "I-edit ang pahinang ito" 61 | 62 | msgid "By the" 63 | msgstr "Sa pamamagitan ng" 64 | 65 | msgid "next page" 66 | msgstr "Susunod na pahina" 67 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/tr/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/tr/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/tr/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: tr\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "PDF olarak yazdır" 13 | 14 | msgid "Theme by the" 15 | msgstr "Tarafından tema" 16 | 17 | msgid "Download source file" 18 | msgstr "Kaynak dosyayı indirin" 19 | 20 | msgid "open issue" 21 | msgstr "Açık konu" 22 | 23 | msgid "Contents" 24 | msgstr "İçindekiler" 25 | 26 | msgid "previous page" 27 | msgstr "önceki sayfa" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Defter dosyasını indirin" 31 | 32 | msgid "Copyright" 33 | msgstr "Telif hakkı" 34 | 35 | msgid "Download this page" 36 | msgstr "Bu sayfayı indirin" 37 | 38 | msgid "Source repository" 39 | msgstr "Kaynak kod deposu" 40 | 41 | msgid "By" 42 | msgstr "Tarafından" 43 | 44 | msgid "repository" 45 | msgstr "depo" 46 | 47 | msgid "Last updated on" 48 | msgstr "Son güncelleme tarihi" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Gezinmeyi değiştir" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sfenks Kitap Teması" 55 | 56 | msgid "suggest edit" 57 | msgstr "düzenleme öner" 58 | 59 | msgid "Open an issue" 60 | msgstr "Bir sorunu açın" 61 | 62 | msgid "Launch" 63 | msgstr "Başlatmak" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Tam ekran modu" 67 | 68 | msgid "Edit this page" 69 | msgstr "Bu sayfayı düzenle" 70 | 71 | msgid "By the" 72 | msgstr "Tarafından" 73 | 74 | msgid "next page" 75 | msgstr "sonraki Sayfa" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/uk/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/uk/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/uk/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: uk\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "Друк у форматі PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Тема від" 16 | 17 | msgid "Download source file" 18 | msgstr "Завантажити вихідний файл" 19 | 20 | msgid "open issue" 21 | msgstr "відкритий випуск" 22 | 23 | msgid "Contents" 24 | msgstr "Зміст" 25 | 26 | msgid "previous page" 27 | msgstr "Попередня сторінка" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Завантажте файл блокнота" 31 | 32 | msgid "Copyright" 33 | msgstr "Авторське право" 34 | 35 | msgid "Download this page" 36 | msgstr "Завантажте цю сторінку" 37 | 38 | msgid "Source repository" 39 | msgstr "Джерело сховища" 40 | 41 | msgid "By" 42 | msgstr "Автор" 43 | 44 | msgid "repository" 45 | msgstr "сховище" 46 | 47 | msgid "Last updated on" 48 | msgstr "Останнє оновлення:" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Переключити навігацію" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Тема книги \"Сфінкс\"" 55 | 56 | msgid "suggest edit" 57 | msgstr "запропонувати редагувати" 58 | 59 | msgid "Open an issue" 60 | msgstr "Відкрийте випуск" 61 | 62 | msgid "Launch" 63 | msgstr "Запуск" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Повноекранний режим" 67 | 68 | msgid "Edit this page" 69 | msgstr "Редагувати цю сторінку" 70 | 71 | msgid "By the" 72 | msgstr "По" 73 | 74 | msgid "next page" 75 | msgstr "Наступна сторінка" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ur/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/ur/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/ur/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: ur\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "پی ڈی ایف پرنٹ کریں" 13 | 14 | msgid "Theme by the" 15 | msgstr "کے ذریعہ تھیم" 16 | 17 | msgid "Download source file" 18 | msgstr "سورس فائل ڈاؤن لوڈ کریں" 19 | 20 | msgid "open issue" 21 | msgstr "کھلا مسئلہ" 22 | 23 | msgid "previous page" 24 | msgstr "سابقہ ​​صفحہ" 25 | 26 | msgid "Download notebook file" 27 | msgstr "نوٹ بک فائل ڈاؤن لوڈ کریں" 28 | 29 | msgid "Copyright" 30 | msgstr "کاپی رائٹ" 31 | 32 | msgid "Download this page" 33 | msgstr "اس صفحے کو ڈاؤن لوڈ کریں" 34 | 35 | msgid "Source repository" 36 | msgstr "ماخذ ذخیرہ" 37 | 38 | msgid "By" 39 | msgstr "بذریعہ" 40 | 41 | msgid "Last updated on" 42 | msgstr "آخری بار تازہ کاری ہوئی" 43 | 44 | msgid "Toggle navigation" 45 | msgstr "نیویگیشن ٹوگل کریں" 46 | 47 | msgid "Sphinx Book Theme" 48 | msgstr "سپنکس بک تھیم" 49 | 50 | msgid "suggest edit" 51 | msgstr "ترمیم کی تجویز کریں" 52 | 53 | msgid "Open an issue" 54 | msgstr "ایک مسئلہ کھولیں" 55 | 56 | msgid "Launch" 57 | msgstr "لانچ کریں" 58 | 59 | msgid "Edit this page" 60 | msgstr "اس صفحے میں ترمیم کریں" 61 | 62 | msgid "By the" 63 | msgstr "کی طرف" 64 | 65 | msgid "next page" 66 | msgstr "اگلا صفحہ" 67 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/vi/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/vi/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/vi/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: vi\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "In sang PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "Chủ đề của" 16 | 17 | msgid "Download source file" 18 | msgstr "Tải xuống tệp nguồn" 19 | 20 | msgid "open issue" 21 | msgstr "vấn đề mở" 22 | 23 | msgid "Contents" 24 | msgstr "Nội dung" 25 | 26 | msgid "previous page" 27 | msgstr "trang trước" 28 | 29 | msgid "Download notebook file" 30 | msgstr "Tải xuống tệp sổ tay" 31 | 32 | msgid "Copyright" 33 | msgstr "Bản quyền" 34 | 35 | msgid "Download this page" 36 | msgstr "Tải xuống trang này" 37 | 38 | msgid "Source repository" 39 | msgstr "Kho nguồn" 40 | 41 | msgid "By" 42 | msgstr "Bởi" 43 | 44 | msgid "repository" 45 | msgstr "kho" 46 | 47 | msgid "Last updated on" 48 | msgstr "Cập nhật lần cuối vào" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "Chuyển đổi điều hướng thành" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Chủ đề sách nhân sư" 55 | 56 | msgid "suggest edit" 57 | msgstr "đề nghị chỉnh sửa" 58 | 59 | msgid "Open an issue" 60 | msgstr "Mở một vấn đề" 61 | 62 | msgid "Launch" 63 | msgstr "Phóng" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "Chế độ toàn màn hình" 67 | 68 | msgid "Edit this page" 69 | msgstr "chỉnh sửa trang này" 70 | 71 | msgid "By the" 72 | msgstr "Bằng" 73 | 74 | msgid "next page" 75 | msgstr "Trang tiếp theo" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/zh_CN/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/zh_CN/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/zh_CN/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: zh_CN\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "列印成 PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "主题作者:" 16 | 17 | msgid "Download source file" 18 | msgstr "下载源文件" 19 | 20 | msgid "open issue" 21 | msgstr "创建议题" 22 | 23 | msgid "Contents" 24 | msgstr "目录" 25 | 26 | msgid "previous page" 27 | msgstr "上一页" 28 | 29 | msgid "Download notebook file" 30 | msgstr "下载笔记本文件" 31 | 32 | msgid "Copyright" 33 | msgstr "版权" 34 | 35 | msgid "Download this page" 36 | msgstr "下载此页面" 37 | 38 | msgid "Source repository" 39 | msgstr "源码库" 40 | 41 | msgid "By" 42 | msgstr "作者:" 43 | 44 | msgid "repository" 45 | msgstr "仓库" 46 | 47 | msgid "Last updated on" 48 | msgstr "上次更新时间:" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "显示或隐藏导航栏" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sphinx Book 主题" 55 | 56 | msgid "suggest edit" 57 | msgstr "提出修改建议" 58 | 59 | msgid "Open an issue" 60 | msgstr "创建议题" 61 | 62 | msgid "Launch" 63 | msgstr "启动" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "全屏模式" 67 | 68 | msgid "Edit this page" 69 | msgstr "编辑此页面" 70 | 71 | msgid "By the" 72 | msgstr "作者:" 73 | 74 | msgid "next page" 75 | msgstr "下一页" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/zh_TW/LC_MESSAGES/booktheme.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/locales/zh_TW/LC_MESSAGES/booktheme.mo -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/locales/zh_TW/LC_MESSAGES/booktheme.po: -------------------------------------------------------------------------------- 1 | 2 | msgid "" 3 | msgstr "" 4 | "Project-Id-Version: Sphinx-Book-Theme\n" 5 | "MIME-Version: 1.0\n" 6 | "Content-Type: text/plain; charset=UTF-8\n" 7 | "Content-Transfer-Encoding: 8bit\n" 8 | "Language: zh_TW\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | msgid "Print to PDF" 12 | msgstr "列印成 PDF" 13 | 14 | msgid "Theme by the" 15 | msgstr "佈景主題作者:" 16 | 17 | msgid "Download source file" 18 | msgstr "下載原始檔" 19 | 20 | msgid "open issue" 21 | msgstr "公開的問題" 22 | 23 | msgid "Contents" 24 | msgstr "目錄" 25 | 26 | msgid "previous page" 27 | msgstr "上一頁" 28 | 29 | msgid "Download notebook file" 30 | msgstr "下載 Notebook 檔案" 31 | 32 | msgid "Copyright" 33 | msgstr "Copyright" 34 | 35 | msgid "Download this page" 36 | msgstr "下載此頁面" 37 | 38 | msgid "Source repository" 39 | msgstr "來源儲存庫" 40 | 41 | msgid "By" 42 | msgstr "作者:" 43 | 44 | msgid "repository" 45 | msgstr "儲存庫" 46 | 47 | msgid "Last updated on" 48 | msgstr "最後更新時間:" 49 | 50 | msgid "Toggle navigation" 51 | msgstr "顯示或隱藏導覽列" 52 | 53 | msgid "Sphinx Book Theme" 54 | msgstr "Sphinx Book 佈景主題" 55 | 56 | msgid "suggest edit" 57 | msgstr "提出修改建議" 58 | 59 | msgid "Open an issue" 60 | msgstr "開啟議題" 61 | 62 | msgid "Launch" 63 | msgstr "啟動" 64 | 65 | msgid "Fullscreen mode" 66 | msgstr "全螢幕模式" 67 | 68 | msgid "Edit this page" 69 | msgstr "編輯此頁面" 70 | 71 | msgid "By the" 72 | msgstr "作者:" 73 | 74 | msgid "next page" 75 | msgstr "下一頁" 76 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/logo.png -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/minus.png -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/plus.png -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/sbt-webpack-macros.html: -------------------------------------------------------------------------------- 1 | 5 | {% macro head_pre_bootstrap() %} 6 | 7 | {% endmacro %} 8 | 9 | {% macro body_post() %} 10 | 11 | {% endmacro %} 12 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/scripts/bootstrap.js.LICENSE.txt: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap v5.3.3 (https://getbootstrap.com/) 3 | * Copyright 2011-2024 The Bootstrap Authors (https://github.com/twbs/bootstrap/graphs/contributors) 4 | * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) 5 | */ 6 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/scripts/sphinx-book-theme.js: -------------------------------------------------------------------------------- 1 | (()=>{"use strict";var e=e=>{"loading"!=document.readyState?e():document.addEventListener?document.addEventListener("DOMContentLoaded",e):document.attachEvent("onreadystatechange",(function(){"complete"==document.readyState&&e()}))};window.initThebeSBT=()=>{var e=document.querySelector("section h1");e.nextElementSibling.classList.contains("thebe-launch-button")||e.insertAdjacentHTML("afterend",""),initThebe()},window.toggleFullScreen=()=>{var e=document.fullscreenElement&&null!==document.fullscreenElement||document.webkitFullscreenElement&&null!==document.webkitFullscreenElement;let t=document.documentElement;e?(console.log("[SBT]: Exiting full screen"),document.exitFullscreen?document.exitFullscreen():document.webkitExitFullscreen&&document.webkitExitFullscreen()):(console.log("[SBT]: Entering full screen"),t.requestFullscreen?t.requestFullscreen():t.webkitRequestFullscreen&&t.webkitRequestFullscreen())},e((()=>{var e=[];let t=new IntersectionObserver(((t,n)=>{t.forEach((t=>{if(t.isIntersecting)e.push(t.target);else for(let n=0;n0?document.querySelector("div.bd-sidebar-secondary").classList.add("hide"):document.querySelector("div.bd-sidebar-secondary").classList.remove("hide")}),{rootMargin:"0px 0px -33% 0px"}),n=[];["marginnote","sidenote","margin","margin-caption","full-width","sidebar","popout"].forEach((e=>{n.push(`.${e}`,`.tag_${e}`,`.${e.replace("-","_")}`,`.tag_${e.replace("-","_")}`)})),document.querySelectorAll(n.join(", ")).forEach((e=>{t.observe(e)})),new IntersectionObserver(((e,t)=>{e[0].boundingClientRect.y<0?document.body.classList.add("scrolled"):document.body.classList.remove("scrolled")})).observe(document.querySelector(".sbt-scroll-pixel-helper"))})),e((function(){var e=[".bd-header-announcement",".bd-header",".bd-header-article",".bd-sidebar-primary",".bd-sidebar-secondary",".bd-footer-article",".bd-footer-content",".bd-footer"].join(",");document.querySelectorAll(e).forEach((e=>{e.classList.add("noprint")}))}))})(); 2 | //# sourceMappingURL=sphinx-book-theme.js.map -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/styles/theme.css: -------------------------------------------------------------------------------- 1 | /* Provided by Sphinx's 'basic' theme, and included in the final set of assets */ 2 | @import "../basic.css"; 3 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/js/all.min.js.LICENSE.txt: -------------------------------------------------------------------------------- 1 | /*! 2 | * Font Awesome Free 6.5.2 by @fontawesome - https://fontawesome.com 3 | * License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) 4 | * Copyright 2024 Fonticons, Inc. 5 | */ 6 | -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.ttf -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2 -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.ttf -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2 -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.ttf -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2 -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-v4compatibility.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-v4compatibility.ttf -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-v4compatibility.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/_static/vendor/fontawesome/6.5.2/webfonts/fa-v4compatibility.woff2 -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/_static/webpack-macros.html: -------------------------------------------------------------------------------- 1 | 5 | {# Load FontAwesome icons #} 6 | {% macro head_pre_icons() %} 7 | 8 | 9 | 10 | 11 | {% endmacro %} 12 | 13 | {% macro head_pre_assets() %} 14 | 15 | 16 | 17 | 18 | {% endmacro %} 19 | 20 | {% macro head_js_preload() %} 21 | 22 | 23 | 24 | 25 | {% endmacro %} 26 | 27 | {% macro body_post() %} 28 | 29 | 30 | 31 | {% endmacro %} -------------------------------------------------------------------------------- /docs/zh_cn/_build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_build/html/objects.inv -------------------------------------------------------------------------------- /docs/zh_cn/_static/image/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/docs/zh_cn/_static/image/logo.png -------------------------------------------------------------------------------- /docs/zh_cn/algorithm/formula_detection.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_formula_detection: 2 | 3 | ==================== 4 | 公式检测算法 5 | ==================== 6 | 7 | 简介 8 | ==================== 9 | 10 | 公式检测是针对给定的输入图像,检测出图像中所有包含公式的位置(包含行内公式和行间公式) 11 | 12 | .. note:: 13 | 14 | 公式检测实际上属于布局检测子任务,但由于公式检查的复杂性,我们建议使用单独的公式检测模型解耦。 15 | 这样通常使得数据标注更加方便,且公式检测效果也更好。 16 | 17 | 模型使用 18 | ==================== 19 | 20 | 在配置好环境的情况下,直接执行 ``scripts/formula_detection.py`` 即可运行布局检测算法脚本。 21 | 22 | .. code:: shell 23 | 24 | $ python scripts/formula_detection.py --config configs/formula_detection.yaml 25 | 26 | 模型配置 27 | -------------------- 28 | 29 | .. code:: yaml 30 | 31 | inputs: assets/demo/formula_detection 32 | outputs: outputs/formula_detection 33 | tasks: 34 | formula_detection: 35 | model: formula_detection_yolo 36 | model_config: 37 | img_size: 1280 38 | conf_thres: 0.25 39 | iou_thres: 0.45 40 | batch_size: 1 41 | model_path: models/MFD/yolov8/weights.pt 42 | visualize: True 43 | 44 | - inputs/outputs: 分别定义输入文件路径和可视化输出目录 45 | - tasks: 定义任务类型,当前只包含一个公式检测任务 46 | - model: 定义具体模型类型: 当前仅提供YOLO公式检测模型 47 | - model_config: 定义模型配置 48 | - img_size: 定义图像长边大小,短边会根据长边等比例缩放 49 | - conf_thres: 定义置信度阈值,仅检测大于该阈值的目标 50 | - iou_thres: 定义IoU阈值,去除重叠度大于该阈值的目标 51 | - batch_size: 定义批量大小,推理时每次同时推理的图像数,一般情况下越大推理速度越快,显卡越好该数值可以设置的越大 52 | - model_path: 模型权重路径 53 | - visualize: 是否对模型结果进行可视化,可视化结果会保存在outputs目录下。 54 | 55 | 多样化输入支持 56 | -------------------- 57 | 58 | PDF-Extract-Kit中的公式检测脚本支持 ``单个图像`` 、 ``只包含图像文件的目录`` 、 ``单个PDF文件`` 、 ``只包含PDF文件的目录`` 等输入形式。 59 | 60 | .. note:: 61 | 62 | 根据自己实际数据形式,修改 ``configs/formula_detection.yaml`` 中 ``inputs`` 的路径即可 63 | - 单个图像: path/to/image 64 | - 图像文件夹: path/to/images 65 | - 单个PDF文件: path/to/pdf 66 | - PDF文件夹: path/to/pdfs 67 | 68 | .. note:: 69 | 70 | 当使用PDF作为输入时,需要将 ``formula_detection.py`` 中的 ``predict_images`` 修改为 ``predict_pdfs`` 。 71 | 72 | 73 | .. code:: python 74 | 75 | # for image detection 76 | detection_results = model_formula_detection.predict_images(input_data, result_path) 77 | 78 | 79 | .. code:: python 80 | 81 | # for pdf detection 82 | detection_results = model_formula_detection.predict_pdfs(input_data, result_path) 83 | 84 | 85 | 可视化结果查看 86 | -------------------- 87 | 88 | 当config文件中 ``visualize`` 设置为 ``True`` 时,可视化结果会保存在 ``outputs/formula_detection`` 目录下。 89 | 90 | .. note:: 91 | 92 | 可视化可以方便对模型结果进行分析,但当进行大批量任务时,建议关掉可视化(设置 ``visualize`` 为 ``False`` ),减少内存和磁盘占用。 -------------------------------------------------------------------------------- /docs/zh_cn/algorithm/formula_recognition.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_formula_recognition: 2 | 3 | ============ 4 | 公式识别算法 5 | ============ 6 | 7 | 简介 8 | ================= 9 | 10 | 公式检测是指给定输入公式图像,识别公式图像内容并转为 ``LaTeX`` 格式。 11 | 12 | 模型使用 13 | ================= 14 | 15 | 在配置好环境的情况下,直接执行 ``scripts/formula_recognition.py`` 即可运行布局检测算法脚本。 16 | 17 | .. code:: shell 18 | 19 | $ python scripts/formula_recognition.py --config configs/formula_recognition.yaml 20 | 21 | 模型配置 22 | ----------------- 23 | 24 | .. code:: yaml 25 | 26 | inputs: assets/demo/formula_recognition 27 | outputs: outputs/formula_recognition 28 | tasks: 29 | formula_recognition: 30 | model: formula_recognition_unimernet 31 | model_config: 32 | cfg_path: pdf_extract_kit/configs/unimernet.yaml 33 | model_path: models/MFR/unimernet_tiny 34 | visualize: False 35 | 36 | - inputs/outputs: 分别定义输入文件路径和LaTeX预测结果目录 37 | - tasks: 定义任务类型,当前只包含一个公式识别任务 38 | - model: 定义具体模型类型: 当前仅提供 `UniMERNet `_ 公式识别模型 39 | - model_config: 定义模型配置 40 | - cfg_path: UniMERNet配置文件路径 41 | - model_path: 模型权重路径 42 | - visualize: 是否对模型结果进行可视化,可视化结果会保存在outputs目录下。 43 | 44 | 多样化输入支持 45 | ----------------- 46 | 47 | PDF-Extract-Kit中的公式检测脚本支持 ``单个公式图像`` 、 ``文档图像及对应公式区域`` 48 | 49 | 可视化结果查看 50 | ----------------- 51 | 52 | 当config文件中visualize设置为True时, ``LaTeX`` 预测结果会保存在outputs目录下。 -------------------------------------------------------------------------------- /docs/zh_cn/algorithm/ocr.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_ocr: 2 | ========================== 3 | 光学字符识别(OCR)算法 4 | ========================== 5 | 6 | 简介 7 | ==================== 8 | 9 | 光学字符识别(OCR)是指对图片中的文字块进行检测和识别。 10 | 11 | 12 | 模型使用 13 | ==================== 14 | 15 | 在配置好环境的情况下,直接执行 ``scripts/ocr.py`` 即可运行OCR算法脚本。 16 | 17 | .. code:: shell 18 | 19 | $ python scripts/ocr.py --config configs/ocr.yaml 20 | 21 | 22 | 模型配置 23 | -------------------- 24 | 25 | .. code:: yaml 26 | 27 | inputs: assets/demo/ocr 28 | outputs: outputs/ocr 29 | visualize: True 30 | tasks: 31 | ocr: 32 | model: ocr_ppocr 33 | model_config: 34 | lang: ch 35 | show_log: True 36 | det_model_dir: models/OCR/PaddleOCR/det/ch_PP-OCRv4_det 37 | rec_model_dir: models/OCR/PaddleOCR/rec/ch_PP-OCRv4_rec 38 | det_db_box_thresh: 0.3 39 | 40 | - inputs/outputs: 分别定义输入文件路径和输出路径 41 | - visualize: 是否对模型结果进行可视化,可视化结果会保存在outputs目录下。 42 | - tasks: 定义任务类型,当前只包含一个OCR任务 43 | - model: 定义具体模型类型, 当前仅提供PaddleOCR模型 44 | - model_config: 定义模型配置 45 | - lang: 定义语种,默认语种ch支持中英文文字的检测和识别 46 | - show_log: 是否打印检测识别过程的日志 47 | - det_model_dir: 定义PaddleOCR检测模型的路径,指定路径不存在时,会自动下载模型权重到该路径 48 | - rec_model_dir: 定义PaddleOCR识别模型的路径,指定路径不存在时,会自动下载模型权重到该路径 49 | - det_db_box_thresh: 检测框筛选阈值,置信度低于该阈值的框会被舍弃 50 | 51 | 52 | 多样化输入支持 53 | -------------------- 54 | 55 | PDF-Extract-Kit中的OCR脚本支持 ``单个图像/PDF文件`` 、 ``包含图像/PDF文件的目录`` 等输入形式。 56 | 57 | 58 | 可视化结果查看 59 | -------------------- 60 | 61 | 当config文件中 ``visualize`` 设置为 ``True`` 时,可视化结果会保存在 ``outputs`` 参数指定的目录下。 62 | 63 | .. note:: 64 | 65 | 可视化可以方便对模型结果进行分析,但当进行大批量任务时,建议关掉可视化(设置 ``visualize`` 为 ``False`` ),减少内存和磁盘占用。 -------------------------------------------------------------------------------- /docs/zh_cn/algorithm/reading_order.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_reading_oder: 2 | ============== 3 | 阅读顺序算法 4 | ============== 5 | 6 | Comming soon. -------------------------------------------------------------------------------- /docs/zh_cn/algorithm/table_recognition.rst: -------------------------------------------------------------------------------- 1 | .. _algorithm_table_recognition: 2 | 3 | ============ 4 | 表格识别算法 5 | ============ 6 | 7 | 简介 8 | ================= 9 | 10 | 表格识别是指输入表格图像,识别表格结构和内容,并将其转换为 ``LaTeX`` 或 ``HTML`` 等格式。 11 | 12 | 模型使用 13 | ================= 14 | 15 | 在配置好环境的情况下,直接执行 ``scripts/table_parsing.py`` 即可运行表格识别算法脚本。 16 | 17 | .. code:: shell 18 | 19 | $ python scripts/table_parsing.py --config configs/table_parsing.yaml 20 | 21 | 模型配置 22 | ----------------- 23 | 24 | .. code:: yaml 25 | 26 | inputs: assets/demo/table_parsing 27 | outputs: outputs/table_parsing 28 | tasks: 29 | table_parsing: 30 | model: table_parsing_struct_eqtable 31 | model_config: 32 | model_path: models/TabRec/StructEqTable 33 | max_new_tokens: 1024 34 | max_time: 30 35 | output_format: latex 36 | lmdeploy: False 37 | flash_attn: True 38 | 39 | - inputs/outputs: 分别定义输入文件路径和表格识别结果目录 40 | - tasks: 定义任务类型,当前只包含一个表格识别任务 41 | - model: 定义具体模型类型: 当前使用 `StructEqTable `_ 表格识别模型 42 | - model_config: 定义模型配置 43 | - model_path: 模型权重路径 44 | - max_new_tokens: 生成的最大token数量, 默认为1024, 最大支持4096 45 | - max_time: 模型运行的最大时间(秒) 46 | - output_format: 输出格式,默认设置为 ``latex``, 可选有 ``html`` 和 ``markdown`` 47 | - lmdeploy: 是否使用 LMDeploy 进行部署,当前设置为 False 48 | - flash_attn: 是否使用flash attention,仅适用于Ampere GPU 49 | 50 | 51 | 多样化输入支持 52 | ----------------- 53 | 54 | PDF-Extract-Kit中的表格识别脚本支持 ``单个表格图像`` 和 ``多个表格图像`` 作为输入。 55 | 56 | .. note:: 57 | 58 | StructEqTable表格模型仅支持GPU设备下运行 59 | 60 | .. note:: 61 | 62 | 根据表格内容调整 ``max_new_tokens`` 和 ``max_time``, 默认分别为1024和30。 63 | 64 | .. note:: 65 | 66 | lmdeploy为加速推理的选项,如果设置为True,将使用LMDeploy进行加速推理部署。 67 | 使用LMDeploy部署需要安装LMDeploy,安装方法参考 `LMDeploy `_ 。 68 | 69 | -------------------------------------------------------------------------------- /docs/zh_cn/evaluation/formula_detection.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | 公式检测算法评测 3 | ===================== 4 | 5 | XXX -------------------------------------------------------------------------------- /docs/zh_cn/evaluation/formula_recognition.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | 公式识别算法评测 3 | ===================== 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/evaluation/layout_detection.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | 布局检测算法评测 3 | ===================== 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/evaluation/ocr.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | OCR算法评测 3 | ===================== 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/evaluation/pdf_extract.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | PDF内容提取评测【端到端】 3 | ===================== 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/evaluation/reading_order.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | 阅读顺序算法评测 3 | ===================== 4 | 5 | XXX -------------------------------------------------------------------------------- /docs/zh_cn/evaluation/table_recognition.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | 表格识别算法评测 3 | ===================== 4 | 5 | Comming soon! 6 | -------------------------------------------------------------------------------- /docs/zh_cn/get_started/installation.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | 安装 3 | ================================== 4 | 5 | 本节中,我们将演示如何安装 PDF-Extract-Kit。 6 | 7 | 最佳实践 8 | ======== 9 | 10 | 我们推荐用户参照我们的最佳实践安装 PDF-Extract-Kit。 11 | 推荐使用 Python-3.10 的 conda 虚拟环境安装 PDF-Extract-Kit。 12 | 13 | **步骤 1.** 使用 conda 先构建一个 Python-3.10 的虚拟环境 14 | 15 | .. code-block:: console 16 | 17 | $ conda create -n pdf-extract-kit-1.0 python=3.10 -y 18 | $ conda activate pdf-extract-kit-1.0 19 | 20 | **步骤 2.** 安装 PDF-Extract-Kit 的依赖项 21 | 22 | .. code-block:: console 23 | 24 | $ # 对于GPU设备 25 | $ pip install -r requirements.txt 26 | $ # 对于CPU设备 27 | $ pip install -r requirements-cpu.txt 28 | 29 | .. note:: 30 | 31 | 考虑到用户环境配置的便捷性,我们在requirements.txt只包含当前最好模型需要的环境,目前包含 32 | 33 | - 布局检测:YOLO系列(YOLOv10, DocLayout-YOLO) 34 | - 公式检测:YOLO系列 (YOLOv8) 35 | - 公式识别:UniMERNet 36 | - OCR: PaddleOCR 37 | 38 | 对于其他模型请,如LayoutLMv3需要单独安装环境,具体见\ :ref:`布局检测算法 ` -------------------------------------------------------------------------------- /docs/zh_cn/get_started/quickstart.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | 快速开始 3 | ================================== 4 | 5 | 配置好PDF-Extract-Kit环境,并下载好模型后,我们可以开始使用PDF-Extract-Kit了。 6 | 7 | 8 | 9 | 布局检测示例 10 | ============== 11 | 12 | 布局检测提供了多种模型: ``LayoutLMv3``、 ``YOLOv10``、 ``DocLayout-YOLO``, 相比与 ``LayoutLMv3``, ``YOLOv10`` 速度更快, ``DocLayout-YOLO`` 则是基于 ``YOLOv10`` 的基础上进行多样性文档预训练及模型优化,速度快,精度高。 13 | 14 | **1. 使用布局检测模型** 15 | 16 | .. code-block:: console 17 | 18 | $ python scripts/layout_detection.py --config configs/layout_detection.yaml 19 | 20 | 执行完之后,我们可以在 ``outpus/layout_detection`` 目录下查看检测结果。 21 | 22 | .. note:: 23 | 24 | ``layout_detection.yaml`` 设置输入、输出及模型配置,布局检测更详细教程见\ :ref:`布局检测算法 ` \ 。 25 | 26 | 27 | 公式检测示例 28 | ============== 29 | 30 | 31 | .. code-block:: console 32 | 33 | $ python scripts/formula_detection.py --config configs/formula_detection.yaml 34 | 35 | 执行完之后,我们可以在 ``outpus/formula_detection`` 目录下查看检测结果。 36 | 37 | .. note:: 38 | 39 | ``formula_detection.yaml`` 设置输入、输出及模型配置,公式检测更详细教程见 \ :ref:`公式检测算法 ` \ 。 40 | -------------------------------------------------------------------------------- /docs/zh_cn/index.rst: -------------------------------------------------------------------------------- 1 | .. xtuner documentation master file, created by 2 | sphinx-quickstart on Tue Jan 9 16:33:06 2024. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | 欢迎来到 PDF-Extract-Kit 的中文文档 7 | ============================================== 8 | 9 | .. figure:: ./_static/image/logo.png 10 | :align: center 11 | :alt: pdf-extract-kit 12 | :class: no-scaled-link 13 | 14 | .. raw:: html 15 | 16 |

17 | 高质量文档解析工具箱 18 | 19 |

20 | 21 |

22 | 23 | Star 24 | Watch 25 | Fork 26 |

27 | 28 | 29 | 文档 30 | ------------- 31 | .. toctree:: 32 | :maxdepth: 2 33 | :caption: 快速上手 34 | 35 | get_started/installation.rst 36 | get_started/pretrained_model.rst 37 | get_started/quickstart.rst 38 | 39 | .. toctree:: 40 | :maxdepth: 2 41 | :caption: 基础算法模块 42 | 43 | algorithm/layout_detection.rst 44 | algorithm/formula_detection.rst 45 | algorithm/formula_recognition.rst 46 | algorithm/ocr.rst 47 | algorithm/table_recognition.rst 48 | algorithm/reading_order.rst 49 | 50 | .. toctree:: 51 | :maxdepth: 2 52 | :caption: 新任务拓展 53 | 54 | task_extend/code.rst 55 | task_extend/doc.rst 56 | task_extend/evaluation.rst 57 | 58 | .. toctree:: 59 | :maxdepth: 2 60 | :caption: 支持的模型列表 61 | 62 | models/supported.md 63 | 64 | 65 | .. toctree:: 66 | :maxdepth: 2 67 | :caption: 模型性能评测 68 | 69 | evaluation/layout_detection.rst 70 | evaluation/formula_detection.rst 71 | evaluation/formula_recognition.rst 72 | evaluation/ocr.rst 73 | evaluation/table_recognition.rst 74 | evaluation/reading_order.rst 75 | evaluation/pdf_extract.rst 76 | 77 | .. toctree:: 78 | :maxdepth: 2 79 | :caption: PDF项目 80 | 81 | project/pdf_extract.md 82 | project/doc_translate.md 83 | project/speed_up.md -------------------------------------------------------------------------------- /docs/zh_cn/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/zh_cn/models/supported.md: -------------------------------------------------------------------------------- 1 | # 已支持的模型 2 | 3 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/notes/changelog.md: -------------------------------------------------------------------------------- 1 | 14 | 15 | # 变更日志 16 | 17 | 18 | ## v0.2.0 (2024.09.30) 19 | 20 | PDF-Extract-Kit 代码重构,模块化设计更加简洁易用! 🔥🔥🔥 21 | 22 | ## v0.1.0 (2024.07.01) 23 | 24 | PDF-Extract-Kit 正式发布!🔥🔥🔥 25 | 26 | ### 亮点 27 | 28 | - PDF-Extract-Kit提供高质量布局检测模型 DocLayout-YOLO 29 | - PDF-Extract-Kit提供高质量公式检测模型 YOLOv8 -------------------------------------------------------------------------------- /docs/zh_cn/project/doc_translate.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | 文档翻译项目 3 | ================= 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/project/speed_up.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | 模型加速项目 3 | ================= 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /docs/zh_cn/switch_language.md: -------------------------------------------------------------------------------- 1 | ## English 2 | 3 | ## 简体中文 4 | -------------------------------------------------------------------------------- /docs/zh_cn/task_extend/doc.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | 文档补充 3 | ================================== 4 | 5 | 在实现新的任务和模块后,需要在文档中补充相关内容,以便用户了解如何使用。 6 | 7 | 具体可以参考布局检测任务使用文档:\ :ref:`布局检测算法 ` 8 | 9 | 10 | 主要补充下述几个部分: 11 | 12 | * 任务简介 13 | * 模型使用方式 14 | * 配置文件解释 15 | * 多样化输入支持(如果有) 16 | * 可视化结果查看 -------------------------------------------------------------------------------- /docs/zh_cn/task_extend/evaluation.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | 模型评测 3 | ================================== 4 | 5 | Comming soon! -------------------------------------------------------------------------------- /pdf_extract_kit/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | current_dir = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | root_dir = os.path.abspath(os.path.join(current_dir, '..')) 7 | 8 | if root_dir not in sys.path: 9 | sys.path.insert(0, root_dir) -------------------------------------------------------------------------------- /pdf_extract_kit/configs/unimernet.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | arch: unimernet 3 | model_type: unimernet 4 | model_config: 5 | model_name: ./models/unimernet_tiny 6 | max_seq_len: 1536 7 | 8 | load_pretrained: True 9 | pretrained: './models/unimernet_tiny/pytorch_model.pth' 10 | tokenizer_config: 11 | path: ./models/unimernet_tiny 12 | 13 | datasets: 14 | formula_rec_eval: 15 | vis_processor: 16 | eval: 17 | name: "formula_image_eval" 18 | image_size: 19 | - 192 20 | - 672 21 | 22 | run: 23 | runner: runner_iter 24 | task: unimernet_train 25 | 26 | batch_size_train: 64 27 | batch_size_eval: 64 28 | num_workers: 1 29 | 30 | iters_per_inner_epoch: 2000 31 | max_iters: 60000 32 | 33 | seed: 42 34 | output_dir: "../output/demo" 35 | 36 | evaluate: True 37 | test_splits: [ "eval" ] 38 | 39 | device: "cuda" 40 | world_size: 1 41 | dist_url: "env://" 42 | distributed: True 43 | distributed_type: ddp # or fsdp when train llm 44 | 45 | generate_cfg: 46 | temperature: 0.0 -------------------------------------------------------------------------------- /pdf_extract_kit/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/pdf_extract_kit/dataset/__init__.py -------------------------------------------------------------------------------- /pdf_extract_kit/registry/__init__.py: -------------------------------------------------------------------------------- 1 | from .registry import TASK_REGISTRY, MODEL_REGISTRY -------------------------------------------------------------------------------- /pdf_extract_kit/registry/registry.py: -------------------------------------------------------------------------------- 1 | class Registry: 2 | def __init__(self): 3 | self._registry = {} 4 | 5 | def register(self, name): 6 | def decorator(item): 7 | if name in self._registry: 8 | raise ValueError(f"Item {name} already registered.") 9 | self._registry[name] = item 10 | return item 11 | return decorator 12 | 13 | def get(self, name): 14 | if name not in self._registry: 15 | raise ValueError(f"Item {name} not found in registry.") 16 | return self._registry[name] 17 | 18 | def list_items(self): 19 | return list(self._registry.keys()) 20 | 21 | # Create global registries for tasks and models 22 | TASK_REGISTRY = Registry() 23 | MODEL_REGISTRY = Registry() -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from pdf_extract_kit.tasks.base_task import BaseTask 2 | from pdf_extract_kit.tasks.formula_detection.task import FormulaDetectionTask 3 | from pdf_extract_kit.tasks.formula_recognition.task import FormulaRecognitionTask 4 | from pdf_extract_kit.tasks.layout_detection.task import LayoutDetectionTask 5 | from pdf_extract_kit.tasks.ocr.task import OCRTask 6 | from pdf_extract_kit.tasks.table_parsing.task import TableParsingTask 7 | 8 | from pdf_extract_kit.registry.registry import TASK_REGISTRY 9 | 10 | __all__ = [ 11 | "BaseTask", 12 | "LayoutDetectionTask", 13 | "FormulaRecognitionTask", 14 | "LayoutDetectionTask", 15 | "OCRTask", 16 | "TableParsingTask", 17 | ] 18 | 19 | def load_task(name, cfg=None): 20 | """ 21 | Example 22 | 23 | >>> task = load_task("formula_detection", cfg=None) 24 | """ 25 | task_class = TASK_REGISTRY.get(name) 26 | task_instance = task_class(cfg) 27 | 28 | return task_instance 29 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/formula_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from pdf_extract_kit.tasks.formula_detection.models.yolo import FormulaDetectionYOLO 2 | 3 | from pdf_extract_kit.registry.registry import MODEL_REGISTRY 4 | 5 | __all__ = [ 6 | "FurmulaDetectionYOLO", 7 | ] 8 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/formula_detection/task.py: -------------------------------------------------------------------------------- 1 | from pdf_extract_kit.registry.registry import TASK_REGISTRY 2 | from pdf_extract_kit.tasks.base_task import BaseTask 3 | 4 | @TASK_REGISTRY.register("formula_detection") 5 | class FormulaDetectionTask(BaseTask): 6 | def __init__(self, model): 7 | super().__init__(model) 8 | 9 | def predict_images(self, input_data, result_path): 10 | """ 11 | Predict formulas in images. 12 | 13 | Args: 14 | input_data (str): Path to a single image file or a directory containing image files. 15 | result_path (str): Path to save the prediction results. 16 | 17 | Returns: 18 | list: List of prediction results. 19 | """ 20 | images = self.load_images(input_data) 21 | # Perform detection 22 | return self.model.predict(images, result_path) 23 | 24 | def predict_pdfs(self, input_data, result_path): 25 | """ 26 | Predict formulas in PDF files. 27 | 28 | Args: 29 | input_data (str): Path to a single PDF file or a directory containing PDF files. 30 | result_path (str): Path to save the prediction results. 31 | 32 | Returns: 33 | list: List of prediction results. 34 | """ 35 | pdf_images = self.load_pdf_images(input_data) 36 | # Perform detection 37 | return self.model.predict(list(pdf_images.values()), result_path, list(pdf_images.keys())) -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/formula_recognition/__init__.py: -------------------------------------------------------------------------------- 1 | from pdf_extract_kit.tasks.formula_recognition.models.unimernet import FormulaRecognitionUniMERNet 2 | 3 | from pdf_extract_kit.registry.registry import MODEL_REGISTRY 4 | 5 | __all__ = [ 6 | "FurmulaRecognitionUniMERNet", 7 | ] 8 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/formula_recognition/task.py: -------------------------------------------------------------------------------- 1 | from pdf_extract_kit.registry.registry import TASK_REGISTRY 2 | from pdf_extract_kit.tasks.base_task import BaseTask 3 | 4 | 5 | @TASK_REGISTRY.register("formula_recognition") 6 | class FormulaRecognitionTask(BaseTask): 7 | def __init__(self, model): 8 | super().__init__(model) 9 | 10 | def predict(self, input_data, result_path, bboxes=None): 11 | images = self.load_images(input_data) 12 | # Perform recognition 13 | return self.model.predict(images, result_path) -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/layout_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from pdf_extract_kit.tasks.layout_detection.models.yolo import LayoutDetectionYOLO 2 | # from pdf_extract_kit.tasks.layout_detection.models.layoutlmv3 import LayoutDetectionLayoutlmv3 3 | from pdf_extract_kit.registry.registry import MODEL_REGISTRY 4 | 5 | 6 | __all__ = [ 7 | "LayoutDetectionYOLO", 8 | # "LayoutDetectionLayoutlmv3", 9 | ] 10 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/layout_detection/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/pdf_extract_kit/tasks/layout_detection/models/__init__.py -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/layout_detection/models/layoutlmv3_util/layoutlmft/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import ( 2 | LayoutLMv3Config, 3 | LayoutLMv3ForTokenClassification, 4 | LayoutLMv3ForQuestionAnswering, 5 | LayoutLMv3ForSequenceClassification, 6 | LayoutLMv3Tokenizer, 7 | ) 8 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/layout_detection/models/layoutlmv3_util/layoutlmft/data/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .data_collator import DataCollatorForKeyValueExtraction 3 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/layout_detection/models/layoutlmv3_util/layoutlmft/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .layoutlmv3 import ( 2 | LayoutLMv3Config, 3 | LayoutLMv3ForTokenClassification, 4 | LayoutLMv3ForQuestionAnswering, 5 | LayoutLMv3ForSequenceClassification, 6 | LayoutLMv3Tokenizer, 7 | ) 8 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/layout_detection/models/layoutlmv3_util/layoutlmft/models/layoutlmv3/__init__.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoConfig, AutoModel, AutoModelForTokenClassification, \ 2 | AutoModelForQuestionAnswering, AutoModelForSequenceClassification, AutoTokenizer 3 | from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, RobertaConverter 4 | 5 | from .configuration_layoutlmv3 import LayoutLMv3Config 6 | from .modeling_layoutlmv3 import ( 7 | LayoutLMv3ForTokenClassification, 8 | LayoutLMv3ForQuestionAnswering, 9 | LayoutLMv3ForSequenceClassification, 10 | LayoutLMv3Model, 11 | ) 12 | from .tokenization_layoutlmv3 import LayoutLMv3Tokenizer 13 | from .tokenization_layoutlmv3_fast import LayoutLMv3TokenizerFast 14 | 15 | 16 | #AutoConfig.register("layoutlmv3", LayoutLMv3Config) 17 | #AutoModel.register(LayoutLMv3Config, LayoutLMv3Model) 18 | #AutoModelForTokenClassification.register(LayoutLMv3Config, LayoutLMv3ForTokenClassification) 19 | #AutoModelForQuestionAnswering.register(LayoutLMv3Config, LayoutLMv3ForQuestionAnswering) 20 | #AutoModelForSequenceClassification.register(LayoutLMv3Config, LayoutLMv3ForSequenceClassification) 21 | #AutoTokenizer.register( 22 | # LayoutLMv3Config, slow_tokenizer_class=LayoutLMv3Tokenizer, fast_tokenizer_class=LayoutLMv3TokenizerFast 23 | #) 24 | SLOW_TO_FAST_CONVERTERS.update({"LayoutLMv3Tokenizer": RobertaConverter}) 25 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/layout_detection/models/layoutlmv3_util/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from transformers.models.bert.configuration_bert import BertConfig 3 | from transformers.utils import logging 4 | 5 | 6 | logger = logging.get_logger(__name__) 7 | 8 | LAYOUTLMV3_PRETRAINED_CONFIG_ARCHIVE_MAP = { 9 | "layoutlmv3-base": "https://huggingface.co/microsoft/layoutlmv3-base/resolve/main/config.json", 10 | "layoutlmv3-large": "https://huggingface.co/microsoft/layoutlmv3-large/resolve/main/config.json", 11 | # See all LayoutLMv3 models at https://huggingface.co/models?filter=layoutlmv3 12 | } 13 | 14 | 15 | class LayoutLMv3Config(BertConfig): 16 | model_type = "layoutlmv3" 17 | 18 | def __init__( 19 | self, 20 | pad_token_id=1, 21 | bos_token_id=0, 22 | eos_token_id=2, 23 | max_2d_position_embeddings=1024, 24 | coordinate_size=None, 25 | shape_size=None, 26 | has_relative_attention_bias=False, 27 | rel_pos_bins=32, 28 | max_rel_pos=128, 29 | has_spatial_attention_bias=False, 30 | rel_2d_pos_bins=64, 31 | max_rel_2d_pos=256, 32 | visual_embed=True, 33 | mim=False, 34 | wpa_task=False, 35 | discrete_vae_weight_path='', 36 | discrete_vae_type='dall-e', 37 | input_size=224, 38 | second_input_size=112, 39 | device='cuda', 40 | **kwargs 41 | ): 42 | """Constructs RobertaConfig.""" 43 | super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs) 44 | self.max_2d_position_embeddings = max_2d_position_embeddings 45 | self.coordinate_size = coordinate_size 46 | self.shape_size = shape_size 47 | self.has_relative_attention_bias = has_relative_attention_bias 48 | self.rel_pos_bins = rel_pos_bins 49 | self.max_rel_pos = max_rel_pos 50 | self.has_spatial_attention_bias = has_spatial_attention_bias 51 | self.rel_2d_pos_bins = rel_2d_pos_bins 52 | self.max_rel_2d_pos = max_rel_2d_pos 53 | self.visual_embed = visual_embed 54 | self.mim = mim 55 | self.wpa_task = wpa_task 56 | self.discrete_vae_weight_path = discrete_vae_weight_path 57 | self.discrete_vae_type = discrete_vae_type 58 | self.input_size = input_size 59 | self.second_input_size = second_input_size 60 | self.device = device 61 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/layout_detection/models/layoutlmv3_util/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Open AI Team Authors and The HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Tokenization classes for LayoutLMv3, refer to RoBERTa.""" 16 | 17 | from transformers.models.roberta import RobertaTokenizer 18 | from transformers.utils import logging 19 | 20 | 21 | logger = logging.get_logger(__name__) 22 | 23 | VOCAB_FILES_NAMES = { 24 | "vocab_file": "vocab.json", 25 | "merges_file": "merges.txt", 26 | } 27 | 28 | class LayoutLMv3Tokenizer(RobertaTokenizer): 29 | vocab_files_names = VOCAB_FILES_NAMES 30 | # pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP 31 | # max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES 32 | model_input_names = ["input_ids", "attention_mask"] 33 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/layout_detection/models/layoutlmv3_util/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Open AI Team Authors and The HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Fast Tokenization classes for LayoutLMv3, refer to RoBERTa.""" 16 | 17 | 18 | from transformers.models.roberta.tokenization_roberta_fast import RobertaTokenizerFast 19 | from transformers.utils import logging 20 | 21 | from .tokenization_layoutlmv3 import LayoutLMv3Tokenizer 22 | 23 | 24 | logger = logging.get_logger(__name__) 25 | 26 | VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"} 27 | 28 | 29 | class LayoutLMv3TokenizerFast(RobertaTokenizerFast): 30 | vocab_files_names = VOCAB_FILES_NAMES 31 | # pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP 32 | # max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES 33 | model_input_names = ["input_ids", "attention_mask"] 34 | slow_tokenizer_class = LayoutLMv3Tokenizer 35 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/layout_detection/task.py: -------------------------------------------------------------------------------- 1 | from pdf_extract_kit.registry.registry import TASK_REGISTRY 2 | from pdf_extract_kit.tasks.base_task import BaseTask 3 | 4 | 5 | @TASK_REGISTRY.register("layout_detection") 6 | class LayoutDetectionTask(BaseTask): 7 | def __init__(self, model): 8 | super().__init__(model) 9 | 10 | def predict_images(self, input_data, result_path): 11 | """ 12 | Predict layouts in images. 13 | 14 | Args: 15 | input_data (str): Path to a single image file or a directory containing image files. 16 | result_path (str): Path to save the prediction results. 17 | 18 | Returns: 19 | list: List of prediction results. 20 | """ 21 | images = self.load_images(input_data) 22 | # Perform detection 23 | return self.model.predict(images, result_path) 24 | 25 | def predict_pdfs(self, input_data, result_path): 26 | """ 27 | Predict layouts in PDF files. 28 | 29 | Args: 30 | input_data (str): Path to a single PDF file or a directory containing PDF files. 31 | result_path (str): Path to save the prediction results. 32 | 33 | Returns: 34 | list: List of prediction results. 35 | """ 36 | pdf_images = self.load_pdf_images(input_data) 37 | # Perform detection 38 | return self.model.predict(list(pdf_images.values()), result_path, list(pdf_images.keys())) -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/ocr/__init__.py: -------------------------------------------------------------------------------- 1 | from pdf_extract_kit.tasks.ocr.models.paddle_ocr import ModifiedPaddleOCR 2 | # from pdf_extract_kit.registry.registry import MODEL_REGISTRY 3 | 4 | 5 | __all__ = [ 6 | "ModifiedPaddleOCR", 7 | ] 8 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/table_parsing/__init__.py: -------------------------------------------------------------------------------- 1 | from pdf_extract_kit.tasks.table_parsing.models.struct_eqtable import TableParsingStructEqTable 2 | 3 | from pdf_extract_kit.registry.registry import MODEL_REGISTRY 4 | 5 | __all__ = [ 6 | "TableParsingStructEqTable", 7 | ] 8 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/table_parsing/models/struct_eqtable.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from PIL import Image 4 | from struct_eqtable import build_model 5 | from pdf_extract_kit.registry.registry import MODEL_REGISTRY 6 | 7 | 8 | @MODEL_REGISTRY.register("table_parsing_struct_eqtable") 9 | class TableParsingStructEqTable: 10 | def __init__(self, config): 11 | """ 12 | Initialize the TableParsingStructEqTable class. 13 | 14 | Args: 15 | config (dict): Configuration dictionary containing model parameters. 16 | """ 17 | assert torch.cuda.is_available(), "CUDA must be available for StructEqTable model." 18 | 19 | self.model_dir = config.get('model_path', 'U4R/StructTable-InternVL2-1B') 20 | self.max_new_tokens = config.get('max_new_tokens', 1024) 21 | self.max_time = config.get('max_time', 30) 22 | 23 | self.lmdeploy = config.get('lmdeploy', False) 24 | self.flash_attn = config.get('flash_attn', True) 25 | self.batch_size = config.get('batch_size', 1) 26 | self.default_format = config.get('output_format', 'latex') 27 | 28 | # Load the StructEqTable model 29 | self.model = build_model( 30 | model_ckpt=self.model_dir, 31 | max_new_tokens=self.max_new_tokens, 32 | max_time=self.max_time, 33 | lmdeploy=self.lmdeploy, 34 | flash_attn=self.flash_attn, 35 | batch_size=self.batch_size, 36 | ).cuda() 37 | 38 | def predict(self, images, result_path, output_format=None, **kwargs): 39 | 40 | load_images = [Image.open(image_path) for image_path in images] 41 | 42 | if output_format is None: 43 | output_format = self.default_format 44 | else: 45 | if output_format not in ['latex', 'markdown', 'html']: 46 | raise ValueError(f"Output format {output_format} is not supported.") 47 | 48 | results = self.model( 49 | load_images, output_format=output_format 50 | ) 51 | 52 | return results 53 | -------------------------------------------------------------------------------- /pdf_extract_kit/tasks/table_parsing/task.py: -------------------------------------------------------------------------------- 1 | from pdf_extract_kit.registry.registry import TASK_REGISTRY 2 | from pdf_extract_kit.tasks.base_task import BaseTask 3 | 4 | 5 | @TASK_REGISTRY.register("table_parsing") 6 | class TableParsingTask(BaseTask): 7 | def __init__(self, model): 8 | super().__init__(model) 9 | 10 | def predict(self, input_data, result_path, **kwargs): 11 | images = self.load_images(input_data) 12 | # Perform layout detection on input_data 13 | return self.model.predict(images, result_path, **kwargs) -------------------------------------------------------------------------------- /pdf_extract_kit/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/pdf_extract_kit/utils/__init__.py -------------------------------------------------------------------------------- /pdf_extract_kit/utils/config_loader.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import warnings 3 | from pdf_extract_kit.registry.registry import TASK_REGISTRY, MODEL_REGISTRY 4 | 5 | 6 | def load_config(config_path): 7 | if config_path is None: 8 | warnings.warn( 9 | ("Configuration path is None. Please provide a valid configuration file path. ") 10 | ) 11 | return None 12 | 13 | with open(config_path, 'r') as file: 14 | config = yaml.safe_load(file) 15 | return config 16 | 17 | 18 | # def initialize_task_and_model(config): 19 | # task_name = config['task'] 20 | # model_name = config['model'] 21 | # model_config = config['model_config'] 22 | 23 | # TaskClass = TASK_REGISTRY.get(task_name) 24 | # ModelClass = MODEL_REGISTRY.get(model_name) 25 | 26 | # model_instance = ModelClass(model_config) 27 | # task_instance = TaskClass(model_instance) 28 | 29 | # return task_instance 30 | 31 | def initialize_tasks_and_models(config): 32 | 33 | task_instances = {} 34 | for task_name in config['tasks']: 35 | 36 | model_name = config['tasks'][task_name]['model'] 37 | model_config = config['tasks'][task_name]['model_config'] 38 | 39 | TaskClass = TASK_REGISTRY.get(task_name) 40 | ModelClass = MODEL_REGISTRY.get(model_name) 41 | 42 | model_instance = ModelClass(model_config) 43 | task_instance = TaskClass(model_instance) 44 | 45 | task_instances[task_name] = task_instance 46 | 47 | return task_instances -------------------------------------------------------------------------------- /pdf_extract_kit/utils/data_preprocess.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | from PIL import Image 3 | 4 | 5 | def load_pdf_page(page, dpi): 6 | pix = page.get_pixmap(matrix=fitz.Matrix(dpi/72, dpi/72)) 7 | image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) 8 | if pix.width > 3000 or pix.height > 3000: 9 | pix = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False) 10 | image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) 11 | return image 12 | 13 | def load_pdf(pdf_path, dpi=144): 14 | images = [] 15 | doc = fitz.open(pdf_path) 16 | for i in range(len(doc)): 17 | page = doc[i] 18 | image = load_pdf_page(page, dpi) 19 | images.append(image) 20 | return images -------------------------------------------------------------------------------- /pdf_extract_kit/utils/pdf_utils.py: -------------------------------------------------------------------------------- 1 | from pdf2image import convert_from_path 2 | 3 | def load_pdf(pdf_path): 4 | images = convert_from_path(pdf_path) 5 | return images 6 | -------------------------------------------------------------------------------- /pdf_extract_kit/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from typing import Tuple 3 | 4 | __version__ = '0.1.0' 5 | short_version = __version__ 6 | 7 | 8 | def parse_version_info(version_str: str) -> Tuple: 9 | """Parse version from a string. 10 | 11 | Args: 12 | version_str (str): A string represents a version info. 13 | 14 | Returns: 15 | tuple: A sequence of integer and string represents version. 16 | """ 17 | _version_info = [] 18 | for x in version_str.split('.'): 19 | if x.isdigit(): 20 | _version_info.append(int(x)) 21 | elif x.find('rc') != -1: 22 | patch_version = x.split('rc') 23 | _version_info.append(int(patch_version[0])) 24 | _version_info.append(f'rc{patch_version[1]}') 25 | return tuple(_version_info) 26 | 27 | 28 | version_info = parse_version_info(__version__) -------------------------------------------------------------------------------- /project/pdf2markdown/README.md: -------------------------------------------------------------------------------- 1 | # PDF2Markdown 2 | 3 | **Demo:(left: input image; right: rendered markdown.)** 4 | 5 | ![demo](demo.png) 6 | 7 | 8 | 1. Extract PDF features by these tasks: 9 | 10 | - Layout Detection: Using the YOLOv8 model for region detection, such as images, tables, titles, text, etc.; 11 | 12 | - Formula Detection: Using YOLOv8 for detecting formulas, including inline formulas and isolated formulas; 13 | 14 | - Formula Recognition: Using UniMERNet for formula recognition; 15 | 16 | - Table Recognition: Using StructEqTable for table recognition; 17 | 18 | - Optical Character Recognition: Using PaddleOCR for text recognition; 19 | 20 | 2. Convert features to markdown file: 21 | 22 | Using simple rules to convert the identified result to markdown (*Note: this is a simply convert code and can only support one-column PDFs, see [MinerU](https://github.com/opendatalab/MinerU) for more complex situation*). 23 | 24 | 25 | # Usage 26 | 27 | ``` 28 | python project/pdf2markdown/scripts/run_project.py --config project/pdf2markdown/configs/pdf2markdown.yaml 29 | ``` 30 | -------------------------------------------------------------------------------- /project/pdf2markdown/configs/pdf2markdown.yaml: -------------------------------------------------------------------------------- 1 | inputs: assets/demo/formula_detection 2 | outputs: outputs/pdf2markdown 3 | visualize: True 4 | merge2markdown: True 5 | tasks: 6 | layout_detection: 7 | model: layout_detection_yolo 8 | model_config: 9 | img_size: 1024 10 | conf_thres: 0.25 11 | iou_thres: 0.45 12 | model_path: models/Layout/YOLO/doclayout_yolo_ft.pt 13 | formula_detection: 14 | model: formula_detection_yolo 15 | model_config: 16 | img_size: 1280 17 | conf_thres: 0.25 18 | iou_thres: 0.45 19 | batch_size: 1 20 | model_path: models/MFD/YOLO/yolo_v8_ft.pt 21 | formula_recognition: 22 | model: formula_recognition_unimernet 23 | model_config: 24 | batch_size: 128 25 | cfg_path: pdf_extract_kit/configs/unimernet.yaml 26 | model_path: models/MFR/unimernet_tiny 27 | ocr: 28 | model: ocr_ppocr 29 | model_config: 30 | lang: ch 31 | show_log: True 32 | det_model_dir: models/OCR/PaddleOCR/det/ch_PP-OCRv4_det 33 | rec_model_dir: models/OCR/PaddleOCR/rec/ch_PP-OCRv4_rec 34 | det_db_box_thresh: 0.3 35 | 36 | -------------------------------------------------------------------------------- /project/pdf2markdown/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatalab/PDF-Extract-Kit/fdb25fd4bd9058ba4e13ac16cb68d4f06b23df56/project/pdf2markdown/demo.png -------------------------------------------------------------------------------- /project/pdf2markdown/scripts/run_project.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import os.path as osp 4 | import argparse 5 | from pdf2markdown import PDF2MARKDOWN 6 | 7 | sys.path.append(osp.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..')) 8 | from pdf_extract_kit.utils.config_loader import load_config, initialize_tasks_and_models 9 | from pdf_extract_kit.registry.registry import TASK_REGISTRY 10 | 11 | 12 | TASK_NAME = 'pdf2markdown' 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description="Run a task with a given configuration file.") 16 | parser.add_argument('--config', type=str, required=True, help='Path to the configuration file.') 17 | return parser.parse_args() 18 | 19 | def main(config_path): 20 | config = load_config(config_path) 21 | task_instances = initialize_tasks_and_models(config) 22 | 23 | # get input and output path from config 24 | input_data = config.get('inputs', None) 25 | result_path = config.get('outputs', 'outputs/pdf_extract') 26 | visualize = config.get('visualize', False) 27 | merge2markdown = config.get('merge2markdown', False) 28 | 29 | layout_model = task_instances['layout_detection'].model if 'layout_detection' in task_instances else None 30 | mfd_model = task_instances['formula_detection'].model if 'formula_detection' in task_instances else None 31 | mfr_model = task_instances['formula_recognition'].model if 'formula_recognition' in task_instances else None 32 | ocr_model = task_instances['ocr'].model if 'ocr' in task_instances else None 33 | 34 | pdf_extract_task = TASK_REGISTRY.get(TASK_NAME)(layout_model, mfd_model, mfr_model, ocr_model) 35 | extract_results = pdf_extract_task.process(input_data, save_dir=result_path, visualize=visualize, merge2markdown=merge2markdown) 36 | 37 | print(f'Task done, results can be found at {result_path}') 38 | 39 | if __name__ == "__main__": 40 | args = parse_args() 41 | main(args.config) 42 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "pdf-extract-kit" 7 | version = "0.1.0" 8 | authors = [ 9 | { name="Bin Wang", email="ictwangbin@gmail.com" } 10 | ] 11 | description = "A Comprehensive Toolkit for High-Quality PDF Content Extraction." 12 | readme = "README.md" 13 | license = { file="LICENSE" } 14 | requires-python = ">=3.10" 15 | dependencies = [ 16 | "PyPDF2", 17 | "matplotlib", 18 | "pyyaml", 19 | "frontend", 20 | "pymupdf", 21 | opencv-python = "^4.6.0" 22 | # Add other common dependencies 23 | ] 24 | 25 | [project.optional-dependencies] 26 | layout_detection = [ 27 | "transformers", # for layoutlmv3 28 | # Add other dependencies for layout detection 29 | ] 30 | formula_detection = [ 31 | "ultralytics", # for yolov8 32 | # Add other dependencies for formula detection 33 | ] 34 | # Add additional dependencies for other models 35 | -------------------------------------------------------------------------------- /requirements-cpu.txt: -------------------------------------------------------------------------------- 1 | omegaconf 2 | matplotlib 3 | PyMuPDF 4 | ultralytics>=8.2.85 5 | doclayout-yolo==0.0.2 6 | unimernet==0.2.1 7 | paddlepaddle 8 | paddleocr==2.7.3 9 | struct-eqtable 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | omegaconf 2 | matplotlib 3 | PyMuPDF 4 | ultralytics>=8.2.85 5 | doclayout-yolo==0.0.2 6 | unimernet==0.2.1 7 | paddlepaddle-gpu 8 | paddleocr==2.7.3 9 | struct-eqtable 10 | lmdeploy 11 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | myst-parser 2 | sphinx 3 | sphinx-book-theme 4 | sphinx-copybutton 5 | sphinx-tabs 6 | sphinxcontrib-mermaid -------------------------------------------------------------------------------- /scripts/formula_detection.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import os.path as osp 4 | import argparse 5 | 6 | sys.path.append(osp.join(os.path.dirname(os.path.abspath(__file__)), '..')) 7 | from pdf_extract_kit.utils.config_loader import load_config, initialize_tasks_and_models 8 | import pdf_extract_kit.tasks 9 | 10 | TASK_NAME = 'formula_detection' 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description="Run a task with a given configuration file.") 15 | parser.add_argument('--config', type=str, required=True, help='Path to the configuration file.') 16 | return parser.parse_args() 17 | 18 | def main(config_path): 19 | config = load_config(config_path) 20 | task_instances = initialize_tasks_and_models(config) 21 | 22 | # get input and output path from config 23 | input_data = config.get('inputs', None) 24 | result_path = config.get('outputs', 'outputs'+'/'+TASK_NAME) 25 | 26 | # formula_detection_task 27 | model_formula_detection = task_instances[TASK_NAME] 28 | 29 | # for image detection 30 | detection_results = model_formula_detection.predict_images(input_data, result_path) 31 | 32 | # for pdf detection 33 | # detection_results = model_formula_detection.predict_pdfs(input_data, result_path) 34 | 35 | # print(detection_results) 36 | print(f'The predicted results can be found at {result_path}') 37 | 38 | 39 | if __name__ == "__main__": 40 | args = parse_args() 41 | main(args.config) 42 | -------------------------------------------------------------------------------- /scripts/formula_recognition.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import os.path as osp 4 | import argparse 5 | 6 | sys.path.append(osp.join(os.path.dirname(os.path.abspath(__file__)), '..')) 7 | from pdf_extract_kit.utils.config_loader import load_config, initialize_tasks_and_models 8 | import pdf_extract_kit.tasks 9 | 10 | TASK_NAME = 'formula_recognition' 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description="Run a task with a given configuration file.") 15 | parser.add_argument('--config', type=str, required=True, help='Path to the configuration file.') 16 | return parser.parse_args() 17 | 18 | def main(config_path): 19 | config = load_config(config_path) 20 | task_instances = initialize_tasks_and_models(config) 21 | 22 | # get input and output path from config 23 | input_data = config.get('inputs', None) 24 | result_path = config.get('outputs', 'outputs'+'/'+TASK_NAME) 25 | 26 | # formula_detection_task 27 | model_formula_recognition = task_instances[TASK_NAME] 28 | 29 | # for image detection 30 | recognition_results = model_formula_recognition.predict(input_data, result_path) 31 | 32 | 33 | print('Recognition results are as follows:') 34 | for id, math in enumerate(recognition_results): 35 | print(str(id+1)+': ', math) 36 | 37 | 38 | if __name__ == "__main__": 39 | args = parse_args() 40 | main(args.config) 41 | -------------------------------------------------------------------------------- /scripts/layout_detection.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import os.path as osp 4 | import argparse 5 | 6 | sys.path.append(osp.join(os.path.dirname(os.path.abspath(__file__)), '..')) 7 | from pdf_extract_kit.utils.config_loader import load_config, initialize_tasks_and_models 8 | import pdf_extract_kit.tasks 9 | 10 | TASK_NAME = 'layout_detection' 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description="Run a task with a given configuration file.") 15 | parser.add_argument('--config', type=str, required=True, help='Path to the configuration file.') 16 | return parser.parse_args() 17 | 18 | def main(config_path): 19 | config = load_config(config_path) 20 | task_instances = initialize_tasks_and_models(config) 21 | 22 | # get input and output path from config 23 | input_data = config.get('inputs', None) 24 | result_path = config.get('outputs', 'outputs'+'/'+TASK_NAME) 25 | 26 | # layout_detection_task 27 | model_layout_detection = task_instances[TASK_NAME] 28 | 29 | # for image detection 30 | detection_results = model_layout_detection.predict_images(input_data, result_path) 31 | 32 | # for pdf detection 33 | # detection_results = model_layout_detection.predict_pdfs(input_data, result_path) 34 | 35 | # print(detection_results) 36 | print(f'The predicted results can be found at {result_path}') 37 | 38 | 39 | if __name__ == "__main__": 40 | args = parse_args() 41 | main(args.config) 42 | -------------------------------------------------------------------------------- /scripts/ocr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import os.path as osp 4 | import argparse 5 | 6 | sys.path.append(osp.join(os.path.dirname(os.path.abspath(__file__)), '..')) 7 | from pdf_extract_kit.utils.config_loader import load_config, initialize_tasks_and_models 8 | import pdf_extract_kit.tasks 9 | 10 | TASK_NAME = 'ocr' 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description="Run a task with a given configuration file.") 15 | parser.add_argument('--config', type=str, required=True, help='Path to the configuration file.') 16 | return parser.parse_args() 17 | 18 | def main(config_path): 19 | config = load_config(config_path) 20 | task_instances = initialize_tasks_and_models(config) 21 | 22 | # get input and output path from config 23 | input_data = config.get('inputs', None) 24 | result_path = config.get('outputs', 'outputs'+'/'+TASK_NAME) 25 | visualize = config.get('visualize', False) 26 | 27 | # formula_detection_task 28 | task = task_instances[TASK_NAME] 29 | 30 | detection_results = task.process(input_data, save_dir=result_path, visualize=visualize) 31 | 32 | print(f'Task done, results can be found at {result_path}') 33 | 34 | if __name__ == "__main__": 35 | args = parse_args() 36 | main(args.config) 37 | -------------------------------------------------------------------------------- /scripts/run_task.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import os.path as osp 4 | import argparse 5 | 6 | sys.path.append(osp.join(os.path.dirname(os.path.abspath(__file__)), '..')) 7 | from pdf_extract_kit.utils.config_loader import load_config, initialize_tasks_and_models 8 | import pdf_extract_kit.tasks # 确保所有任务模块被导入 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description="Run a task with a given configuration file.") 13 | parser.add_argument('--config', type=str, required=True, help='Path to the configuration file.') 14 | return parser.parse_args() 15 | 16 | def main(config_path): 17 | config = load_config(config_path) 18 | task_instances = initialize_tasks_and_models(config) 19 | 20 | # 从配置文件中获取输入数据路径 21 | input_data = config.get('inputs', None) 22 | result_path = config.get('outputs', 'outputs') 23 | 24 | # formula_detection_task 25 | model_formula_detection = task_instances['formula_detection'] 26 | detection_results = model_formula_detection.predict(input_data, result_path) 27 | print(detection_results) 28 | 29 | # formula_recognition_task 30 | # model_formula_recognition = task_instances['formula_recognition'] 31 | # recognition_results = model_formula_recognition.predict(input_data, result_path) 32 | 33 | # for id, math in enumerate(recognition_results): 34 | # print(str(id+1)+': ', math) 35 | 36 | # results = task_instance.run(input_data) 37 | # print(results) 38 | 39 | if __name__ == "__main__": 40 | args = parse_args() 41 | main(args.config) 42 | -------------------------------------------------------------------------------- /scripts/table_parsing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import os.path as osp 4 | import argparse 5 | 6 | sys.path.append(osp.join(os.path.dirname(os.path.abspath(__file__)), '..')) 7 | from pdf_extract_kit.utils.config_loader import load_config, initialize_tasks_and_models 8 | import pdf_extract_kit.tasks 9 | 10 | TASK_NAME = 'table_parsing' 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description="Run a task with a given configuration file.") 15 | parser.add_argument('--config', type=str, required=True, help='Path to the configuration file.') 16 | return parser.parse_args() 17 | 18 | def main(config_path): 19 | config = load_config(config_path) 20 | task_instances = initialize_tasks_and_models(config) 21 | 22 | # get input and output path from config 23 | input_data = config.get('inputs', None) 24 | result_path = config.get('outputs', 'outputs'+'/'+TASK_NAME) 25 | 26 | # table_parsing_task 27 | model_table_parsing = task_instances[TASK_NAME] 28 | 29 | # for image detection 30 | parsing_results = model_table_parsing.predict(input_data, result_path) 31 | 32 | 33 | print('Table Parsing results are as follows:') 34 | for id, result in enumerate(parsing_results): 35 | print(str(id+1)+':\n', result) 36 | 37 | 38 | if __name__ == "__main__": 39 | args = parse_args() 40 | main(args.config) 41 | --------------------------------------------------------------------------------