├── .github ├── ISSUE_TEMPLATE │ ├── breaking-bug-report.md │ ├── feature_request.md │ └── output-bug-report.md └── workflows │ ├── benchmarks.yml │ ├── ci.yml │ ├── cla.yml │ ├── publish.yml │ └── scripts.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CLA.md ├── LICENSE ├── MODEL_LICENSE ├── README.md ├── benchmarks ├── __init__.py ├── overall │ ├── __init__.py │ ├── display │ │ ├── __init__.py │ │ ├── dataset.py │ │ └── table.py │ ├── download │ │ ├── __init__.py │ │ ├── base.py │ │ ├── llamaparse.py │ │ ├── main.py │ │ ├── mathpix.py │ │ └── mistral.py │ ├── elo.py │ ├── methods │ │ ├── __init__.py │ │ ├── docling.py │ │ ├── gt.py │ │ ├── llamaparse.py │ │ ├── marker.py │ │ ├── mathpix.py │ │ ├── mistral.py │ │ ├── olmocr.py │ │ └── schema.py │ ├── overall.py │ ├── registry.py │ ├── schema.py │ └── scorers │ │ ├── __init__.py │ │ ├── clean.py │ │ ├── heuristic.py │ │ ├── llm.py │ │ └── schema.py ├── table │ ├── __init__.py │ ├── gemini.py │ ├── inference.py │ ├── scoring.py │ └── table.py ├── throughput │ ├── __init__.py │ └── main.py └── verify_scores.py ├── chunk_convert.py ├── convert.py ├── convert_single.py ├── data ├── .gitignore ├── examples │ ├── json │ │ ├── multicolcnn.json │ │ ├── switch_trans.json │ │ └── thinkpython.json │ └── markdown │ │ ├── multicolcnn │ │ ├── _page_1_Figure_0.jpeg │ │ ├── _page_2_Picture_0.jpeg │ │ ├── _page_6_Figure_0.jpeg │ │ ├── _page_7_Figure_0.jpeg │ │ ├── multicolcnn.md │ │ └── multicolcnn_meta.json │ │ ├── switch_transformers │ │ ├── _page_11_Figure_4.jpeg │ │ ├── _page_12_Figure_4.jpeg │ │ ├── _page_13_Figure_2.jpeg │ │ ├── _page_18_Figure_1.jpeg │ │ ├── _page_18_Figure_3.jpeg │ │ ├── _page_20_Figure_1.jpeg │ │ ├── _page_20_Figure_4.jpeg │ │ ├── _page_27_Figure_1.jpeg │ │ ├── _page_29_Figure_1.jpeg │ │ ├── _page_2_Figure_3.jpeg │ │ ├── _page_30_Figure_1.jpeg │ │ ├── _page_31_Figure_3.jpeg │ │ ├── _page_4_Figure_1.jpeg │ │ ├── _page_5_Figure_3.jpeg │ │ ├── switch_trans.md │ │ └── switch_trans_meta.json │ │ └── thinkpython │ │ ├── _page_109_Figure_1.jpeg │ │ ├── _page_115_Figure_1.jpeg │ │ ├── _page_116_Figure_3.jpeg │ │ ├── _page_127_Figure_1.jpeg │ │ ├── _page_128_Figure_1.jpeg │ │ ├── _page_167_Figure_1.jpeg │ │ ├── _page_169_Figure_1.jpeg │ │ ├── _page_173_Figure_1.jpeg │ │ ├── _page_190_Figure_1.jpeg │ │ ├── _page_195_Figure_1.jpeg │ │ ├── _page_205_Figure_1.jpeg │ │ ├── _page_230_Figure_1.jpeg │ │ ├── _page_233_Figure_1.jpeg │ │ ├── _page_233_Figure_3.jpeg │ │ ├── _page_234_Figure_1.jpeg │ │ ├── _page_235_Figure_1.jpeg │ │ ├── _page_236_Figure_1.jpeg │ │ ├── _page_236_Figure_3.jpeg │ │ ├── _page_237_Figure_1.jpeg │ │ ├── _page_238_Figure_1.jpeg │ │ ├── _page_23_Figure_1.jpeg │ │ ├── _page_23_Figure_3.jpeg │ │ ├── _page_46_Figure_1.jpeg │ │ ├── _page_60_Figure_1.jpeg │ │ ├── _page_60_Figure_3.jpeg │ │ ├── _page_67_Figure_1.jpeg │ │ ├── _page_71_Figure_1.jpeg │ │ ├── _page_78_Figure_1.jpeg │ │ ├── _page_85_Figure_1.jpeg │ │ ├── _page_94_Figure_1.jpeg │ │ ├── _page_99_Figure_17.jpeg │ │ ├── _page_99_Figure_178.jpeg │ │ ├── thinkpython.md │ │ └── thinkpython_meta.json ├── images │ ├── overall.png │ ├── per_doc.png │ └── table.png └── latex_to_md.sh ├── examples ├── README.md └── marker_modal_deployment.py ├── extraction_app.py ├── marker ├── builders │ ├── __init__.py │ ├── document.py │ ├── layout.py │ ├── line.py │ ├── ocr.py │ └── structure.py ├── config │ ├── __init__.py │ ├── crawler.py │ ├── parser.py │ └── printer.py ├── converters │ ├── __init__.py │ ├── extraction.py │ ├── ocr.py │ ├── pdf.py │ └── table.py ├── extractors │ ├── __init__.py │ ├── document.py │ └── page.py ├── logger.py ├── models.py ├── output.py ├── processors │ ├── __init__.py │ ├── blank_page.py │ ├── block_relabel.py │ ├── blockquote.py │ ├── code.py │ ├── debug.py │ ├── document_toc.py │ ├── equation.py │ ├── footnote.py │ ├── ignoretext.py │ ├── line_merge.py │ ├── line_numbers.py │ ├── list.py │ ├── llm │ │ ├── __init__.py │ │ ├── llm_complex.py │ │ ├── llm_equation.py │ │ ├── llm_form.py │ │ ├── llm_handwriting.py │ │ ├── llm_image_description.py │ │ ├── llm_mathblock.py │ │ ├── llm_meta.py │ │ ├── llm_page_correction.py │ │ ├── llm_sectionheader.py │ │ ├── llm_table.py │ │ └── llm_table_merge.py │ ├── order.py │ ├── page_header.py │ ├── reference.py │ ├── sectionheader.py │ ├── table.py │ ├── text.py │ └── util.py ├── providers │ ├── __init__.py │ ├── document.py │ ├── epub.py │ ├── html.py │ ├── image.py │ ├── pdf.py │ ├── powerpoint.py │ ├── registry.py │ ├── spreadsheet.py │ └── utils.py ├── renderers │ ├── __init__.py │ ├── chunk.py │ ├── extraction.py │ ├── html.py │ ├── json.py │ ├── markdown.py │ └── ocr_json.py ├── schema │ ├── __init__.py │ ├── blocks │ │ ├── __init__.py │ │ ├── base.py │ │ ├── basetable.py │ │ ├── caption.py │ │ ├── code.py │ │ ├── complexregion.py │ │ ├── equation.py │ │ ├── figure.py │ │ ├── footnote.py │ │ ├── form.py │ │ ├── handwriting.py │ │ ├── inlinemath.py │ │ ├── listitem.py │ │ ├── pagefooter.py │ │ ├── pageheader.py │ │ ├── picture.py │ │ ├── reference.py │ │ ├── sectionheader.py │ │ ├── table.py │ │ ├── tablecell.py │ │ ├── text.py │ │ └── toc.py │ ├── document.py │ ├── groups │ │ ├── __init__.py │ │ ├── base.py │ │ ├── figure.py │ │ ├── list.py │ │ ├── page.py │ │ ├── picture.py │ │ └── table.py │ ├── polygon.py │ ├── registry.py │ └── text │ │ ├── __init__.py │ │ ├── char.py │ │ ├── line.py │ │ └── span.py ├── scripts │ ├── __init__.py │ ├── chunk_convert.py │ ├── chunk_convert.sh │ ├── common.py │ ├── convert.py │ ├── convert_single.py │ ├── extraction_app.py │ ├── file_to_s3.py │ ├── run_streamlit_app.py │ ├── server.py │ └── streamlit_app.py ├── services │ ├── __init__.py │ ├── azure_openai.py │ ├── claude.py │ ├── gemini.py │ ├── ollama.py │ ├── openai.py │ └── vertex.py ├── settings.py ├── util.py └── utils │ ├── __init__.py │ ├── batch.py │ ├── gpu.py │ └── image.py ├── marker_app.py ├── marker_server.py ├── poetry.lock ├── pyproject.toml ├── pytest.ini ├── signatures └── version1 │ └── cla.json ├── static └── fonts │ └── .gitignore └── tests ├── builders ├── test_blank_page.py ├── test_document_builder.py ├── test_garbled_pdf.py ├── test_layout_replace.py ├── test_ocr_builder.py ├── test_ocr_pipeline.py ├── test_overriding.py ├── test_pdf_links.py ├── test_rotated_bboxes.py ├── test_strip_existing_ocr.py └── test_structure.py ├── config └── test_config.py ├── conftest.py ├── converters ├── test_extraction_converter.py ├── test_ocr_converter.py ├── test_pdf_converter.py └── test_table_converter.py ├── processors ├── test_document_toc_processor.py ├── test_equation_processor.py ├── test_footnote_processor.py ├── test_ignoretext.py ├── test_llm_processors.py ├── test_table_merge.py └── test_table_processor.py ├── providers ├── test_document_providers.py ├── test_image_provider.py └── test_pdf_provider.py ├── renderers ├── test_chunk_renderer.py ├── test_extract_images.py ├── test_html_renderer.py ├── test_json_renderer.py └── test_markdown_renderer.py ├── schema └── groups │ └── test_list_grouping.py ├── services └── test_service_init.py └── utils.py /.github/ISSUE_TEMPLATE/breaking-bug-report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/.github/ISSUE_TEMPLATE/breaking-bug-report.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/.github/ISSUE_TEMPLATE/feature_request.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/output-bug-report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/.github/ISSUE_TEMPLATE/output-bug-report.md -------------------------------------------------------------------------------- /.github/workflows/benchmarks.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/.github/workflows/benchmarks.yml -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.github/workflows/cla.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/.github/workflows/cla.yml -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/.github/workflows/publish.yml -------------------------------------------------------------------------------- /.github/workflows/scripts.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/.github/workflows/scripts.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CLA.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/CLA.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/LICENSE -------------------------------------------------------------------------------- /MODEL_LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/MODEL_LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/README.md -------------------------------------------------------------------------------- /benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/overall/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/overall/display/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/overall/display/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/display/dataset.py -------------------------------------------------------------------------------- /benchmarks/overall/display/table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/display/table.py -------------------------------------------------------------------------------- /benchmarks/overall/download/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/overall/download/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/download/base.py -------------------------------------------------------------------------------- /benchmarks/overall/download/llamaparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/download/llamaparse.py -------------------------------------------------------------------------------- /benchmarks/overall/download/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/download/main.py -------------------------------------------------------------------------------- /benchmarks/overall/download/mathpix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/download/mathpix.py -------------------------------------------------------------------------------- /benchmarks/overall/download/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/download/mistral.py -------------------------------------------------------------------------------- /benchmarks/overall/elo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/elo.py -------------------------------------------------------------------------------- /benchmarks/overall/methods/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/methods/__init__.py -------------------------------------------------------------------------------- /benchmarks/overall/methods/docling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/methods/docling.py -------------------------------------------------------------------------------- /benchmarks/overall/methods/gt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/methods/gt.py -------------------------------------------------------------------------------- /benchmarks/overall/methods/llamaparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/methods/llamaparse.py -------------------------------------------------------------------------------- /benchmarks/overall/methods/marker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/methods/marker.py -------------------------------------------------------------------------------- /benchmarks/overall/methods/mathpix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/methods/mathpix.py -------------------------------------------------------------------------------- /benchmarks/overall/methods/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/methods/mistral.py -------------------------------------------------------------------------------- /benchmarks/overall/methods/olmocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/methods/olmocr.py -------------------------------------------------------------------------------- /benchmarks/overall/methods/schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/methods/schema.py -------------------------------------------------------------------------------- /benchmarks/overall/overall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/overall.py -------------------------------------------------------------------------------- /benchmarks/overall/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/registry.py -------------------------------------------------------------------------------- /benchmarks/overall/schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/schema.py -------------------------------------------------------------------------------- /benchmarks/overall/scorers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/scorers/__init__.py -------------------------------------------------------------------------------- /benchmarks/overall/scorers/clean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/scorers/clean.py -------------------------------------------------------------------------------- /benchmarks/overall/scorers/heuristic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/scorers/heuristic.py -------------------------------------------------------------------------------- /benchmarks/overall/scorers/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/scorers/llm.py -------------------------------------------------------------------------------- /benchmarks/overall/scorers/schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/overall/scorers/schema.py -------------------------------------------------------------------------------- /benchmarks/table/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/table/gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/table/gemini.py -------------------------------------------------------------------------------- /benchmarks/table/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/table/inference.py -------------------------------------------------------------------------------- /benchmarks/table/scoring.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/table/scoring.py -------------------------------------------------------------------------------- /benchmarks/table/table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/table/table.py -------------------------------------------------------------------------------- /benchmarks/throughput/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/throughput/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/throughput/main.py -------------------------------------------------------------------------------- /benchmarks/verify_scores.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/benchmarks/verify_scores.py -------------------------------------------------------------------------------- /chunk_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/chunk_convert.py -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/convert.py -------------------------------------------------------------------------------- /convert_single.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/convert_single.py -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | latex 2 | pdfs 3 | references -------------------------------------------------------------------------------- /data/examples/json/multicolcnn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/json/multicolcnn.json -------------------------------------------------------------------------------- /data/examples/json/switch_trans.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/json/switch_trans.json -------------------------------------------------------------------------------- /data/examples/json/thinkpython.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/json/thinkpython.json -------------------------------------------------------------------------------- /data/examples/markdown/multicolcnn/_page_1_Figure_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/multicolcnn/_page_1_Figure_0.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/multicolcnn/_page_2_Picture_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/multicolcnn/_page_2_Picture_0.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/multicolcnn/_page_6_Figure_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/multicolcnn/_page_6_Figure_0.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/multicolcnn/_page_7_Figure_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/multicolcnn/_page_7_Figure_0.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/multicolcnn/multicolcnn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/multicolcnn/multicolcnn.md -------------------------------------------------------------------------------- /data/examples/markdown/multicolcnn/multicolcnn_meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/multicolcnn/multicolcnn_meta.json -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_11_Figure_4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_11_Figure_4.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_12_Figure_4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_12_Figure_4.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_13_Figure_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_13_Figure_2.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_18_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_18_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_18_Figure_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_18_Figure_3.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_20_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_20_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_20_Figure_4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_20_Figure_4.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_27_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_27_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_29_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_29_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_2_Figure_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_2_Figure_3.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_30_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_30_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_31_Figure_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_31_Figure_3.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_4_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_4_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/_page_5_Figure_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/_page_5_Figure_3.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/switch_trans.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/switch_trans.md -------------------------------------------------------------------------------- /data/examples/markdown/switch_transformers/switch_trans_meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/switch_transformers/switch_trans_meta.json -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_109_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_109_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_115_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_115_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_116_Figure_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_116_Figure_3.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_127_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_127_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_128_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_128_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_167_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_167_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_169_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_169_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_173_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_173_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_190_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_190_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_195_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_195_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_205_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_205_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_230_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_230_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_233_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_233_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_233_Figure_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_233_Figure_3.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_234_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_234_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_235_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_235_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_236_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_236_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_236_Figure_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_236_Figure_3.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_237_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_237_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_238_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_238_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_23_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_23_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_23_Figure_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_23_Figure_3.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_46_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_46_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_60_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_60_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_60_Figure_3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_60_Figure_3.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_67_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_67_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_71_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_71_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_78_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_78_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_85_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_85_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_94_Figure_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_94_Figure_1.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_99_Figure_17.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_99_Figure_17.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/_page_99_Figure_178.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/_page_99_Figure_178.jpeg -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/thinkpython.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/thinkpython.md -------------------------------------------------------------------------------- /data/examples/markdown/thinkpython/thinkpython_meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/examples/markdown/thinkpython/thinkpython_meta.json -------------------------------------------------------------------------------- /data/images/overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/images/overall.png -------------------------------------------------------------------------------- /data/images/per_doc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/images/per_doc.png -------------------------------------------------------------------------------- /data/images/table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/images/table.png -------------------------------------------------------------------------------- /data/latex_to_md.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/data/latex_to_md.sh -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/examples/README.md -------------------------------------------------------------------------------- /examples/marker_modal_deployment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/examples/marker_modal_deployment.py -------------------------------------------------------------------------------- /extraction_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/extraction_app.py -------------------------------------------------------------------------------- /marker/builders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/builders/__init__.py -------------------------------------------------------------------------------- /marker/builders/document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/builders/document.py -------------------------------------------------------------------------------- /marker/builders/layout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/builders/layout.py -------------------------------------------------------------------------------- /marker/builders/line.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/builders/line.py -------------------------------------------------------------------------------- /marker/builders/ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/builders/ocr.py -------------------------------------------------------------------------------- /marker/builders/structure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/builders/structure.py -------------------------------------------------------------------------------- /marker/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /marker/config/crawler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/config/crawler.py -------------------------------------------------------------------------------- /marker/config/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/config/parser.py -------------------------------------------------------------------------------- /marker/config/printer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/config/printer.py -------------------------------------------------------------------------------- /marker/converters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/converters/__init__.py -------------------------------------------------------------------------------- /marker/converters/extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/converters/extraction.py -------------------------------------------------------------------------------- /marker/converters/ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/converters/ocr.py -------------------------------------------------------------------------------- /marker/converters/pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/converters/pdf.py -------------------------------------------------------------------------------- /marker/converters/table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/converters/table.py -------------------------------------------------------------------------------- /marker/extractors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/extractors/__init__.py -------------------------------------------------------------------------------- /marker/extractors/document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/extractors/document.py -------------------------------------------------------------------------------- /marker/extractors/page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/extractors/page.py -------------------------------------------------------------------------------- /marker/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/logger.py -------------------------------------------------------------------------------- /marker/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/models.py -------------------------------------------------------------------------------- /marker/output.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/output.py -------------------------------------------------------------------------------- /marker/processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/__init__.py -------------------------------------------------------------------------------- /marker/processors/blank_page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/blank_page.py -------------------------------------------------------------------------------- /marker/processors/block_relabel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/block_relabel.py -------------------------------------------------------------------------------- /marker/processors/blockquote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/blockquote.py -------------------------------------------------------------------------------- /marker/processors/code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/code.py -------------------------------------------------------------------------------- /marker/processors/debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/debug.py -------------------------------------------------------------------------------- /marker/processors/document_toc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/document_toc.py -------------------------------------------------------------------------------- /marker/processors/equation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/equation.py -------------------------------------------------------------------------------- /marker/processors/footnote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/footnote.py -------------------------------------------------------------------------------- /marker/processors/ignoretext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/ignoretext.py -------------------------------------------------------------------------------- /marker/processors/line_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/line_merge.py -------------------------------------------------------------------------------- /marker/processors/line_numbers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/line_numbers.py -------------------------------------------------------------------------------- /marker/processors/list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/list.py -------------------------------------------------------------------------------- /marker/processors/llm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/__init__.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_complex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_complex.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_equation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_equation.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_form.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_form.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_handwriting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_handwriting.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_image_description.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_image_description.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_mathblock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_mathblock.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_meta.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_page_correction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_page_correction.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_sectionheader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_sectionheader.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_table.py -------------------------------------------------------------------------------- /marker/processors/llm/llm_table_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/llm/llm_table_merge.py -------------------------------------------------------------------------------- /marker/processors/order.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/order.py -------------------------------------------------------------------------------- /marker/processors/page_header.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/page_header.py -------------------------------------------------------------------------------- /marker/processors/reference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/reference.py -------------------------------------------------------------------------------- /marker/processors/sectionheader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/sectionheader.py -------------------------------------------------------------------------------- /marker/processors/table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/table.py -------------------------------------------------------------------------------- /marker/processors/text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/text.py -------------------------------------------------------------------------------- /marker/processors/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/processors/util.py -------------------------------------------------------------------------------- /marker/providers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/providers/__init__.py -------------------------------------------------------------------------------- /marker/providers/document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/providers/document.py -------------------------------------------------------------------------------- /marker/providers/epub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/providers/epub.py -------------------------------------------------------------------------------- /marker/providers/html.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/providers/html.py -------------------------------------------------------------------------------- /marker/providers/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/providers/image.py -------------------------------------------------------------------------------- /marker/providers/pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/providers/pdf.py -------------------------------------------------------------------------------- /marker/providers/powerpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/providers/powerpoint.py -------------------------------------------------------------------------------- /marker/providers/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/providers/registry.py -------------------------------------------------------------------------------- /marker/providers/spreadsheet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/providers/spreadsheet.py -------------------------------------------------------------------------------- /marker/providers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/providers/utils.py -------------------------------------------------------------------------------- /marker/renderers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/renderers/__init__.py -------------------------------------------------------------------------------- /marker/renderers/chunk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/renderers/chunk.py -------------------------------------------------------------------------------- /marker/renderers/extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/renderers/extraction.py -------------------------------------------------------------------------------- /marker/renderers/html.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/renderers/html.py -------------------------------------------------------------------------------- /marker/renderers/json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/renderers/json.py -------------------------------------------------------------------------------- /marker/renderers/markdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/renderers/markdown.py -------------------------------------------------------------------------------- /marker/renderers/ocr_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/renderers/ocr_json.py -------------------------------------------------------------------------------- /marker/schema/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/__init__.py -------------------------------------------------------------------------------- /marker/schema/blocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/__init__.py -------------------------------------------------------------------------------- /marker/schema/blocks/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/base.py -------------------------------------------------------------------------------- /marker/schema/blocks/basetable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/basetable.py -------------------------------------------------------------------------------- /marker/schema/blocks/caption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/caption.py -------------------------------------------------------------------------------- /marker/schema/blocks/code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/code.py -------------------------------------------------------------------------------- /marker/schema/blocks/complexregion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/complexregion.py -------------------------------------------------------------------------------- /marker/schema/blocks/equation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/equation.py -------------------------------------------------------------------------------- /marker/schema/blocks/figure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/figure.py -------------------------------------------------------------------------------- /marker/schema/blocks/footnote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/footnote.py -------------------------------------------------------------------------------- /marker/schema/blocks/form.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/form.py -------------------------------------------------------------------------------- /marker/schema/blocks/handwriting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/handwriting.py -------------------------------------------------------------------------------- /marker/schema/blocks/inlinemath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/inlinemath.py -------------------------------------------------------------------------------- /marker/schema/blocks/listitem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/listitem.py -------------------------------------------------------------------------------- /marker/schema/blocks/pagefooter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/pagefooter.py -------------------------------------------------------------------------------- /marker/schema/blocks/pageheader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/pageheader.py -------------------------------------------------------------------------------- /marker/schema/blocks/picture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/picture.py -------------------------------------------------------------------------------- /marker/schema/blocks/reference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/reference.py -------------------------------------------------------------------------------- /marker/schema/blocks/sectionheader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/sectionheader.py -------------------------------------------------------------------------------- /marker/schema/blocks/table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/table.py -------------------------------------------------------------------------------- /marker/schema/blocks/tablecell.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/tablecell.py -------------------------------------------------------------------------------- /marker/schema/blocks/text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/text.py -------------------------------------------------------------------------------- /marker/schema/blocks/toc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/blocks/toc.py -------------------------------------------------------------------------------- /marker/schema/document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/document.py -------------------------------------------------------------------------------- /marker/schema/groups/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/groups/__init__.py -------------------------------------------------------------------------------- /marker/schema/groups/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/groups/base.py -------------------------------------------------------------------------------- /marker/schema/groups/figure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/groups/figure.py -------------------------------------------------------------------------------- /marker/schema/groups/list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/groups/list.py -------------------------------------------------------------------------------- /marker/schema/groups/page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/groups/page.py -------------------------------------------------------------------------------- /marker/schema/groups/picture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/groups/picture.py -------------------------------------------------------------------------------- /marker/schema/groups/table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/groups/table.py -------------------------------------------------------------------------------- /marker/schema/polygon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/polygon.py -------------------------------------------------------------------------------- /marker/schema/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/registry.py -------------------------------------------------------------------------------- /marker/schema/text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/text/__init__.py -------------------------------------------------------------------------------- /marker/schema/text/char.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/text/char.py -------------------------------------------------------------------------------- /marker/schema/text/line.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/text/line.py -------------------------------------------------------------------------------- /marker/schema/text/span.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/schema/text/span.py -------------------------------------------------------------------------------- /marker/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /marker/scripts/chunk_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/scripts/chunk_convert.py -------------------------------------------------------------------------------- /marker/scripts/chunk_convert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/scripts/chunk_convert.sh -------------------------------------------------------------------------------- /marker/scripts/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/scripts/common.py -------------------------------------------------------------------------------- /marker/scripts/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/scripts/convert.py -------------------------------------------------------------------------------- /marker/scripts/convert_single.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/scripts/convert_single.py -------------------------------------------------------------------------------- /marker/scripts/extraction_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/scripts/extraction_app.py -------------------------------------------------------------------------------- /marker/scripts/file_to_s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/scripts/file_to_s3.py -------------------------------------------------------------------------------- /marker/scripts/run_streamlit_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/scripts/run_streamlit_app.py -------------------------------------------------------------------------------- /marker/scripts/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/scripts/server.py -------------------------------------------------------------------------------- /marker/scripts/streamlit_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/scripts/streamlit_app.py -------------------------------------------------------------------------------- /marker/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/services/__init__.py -------------------------------------------------------------------------------- /marker/services/azure_openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/services/azure_openai.py -------------------------------------------------------------------------------- /marker/services/claude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/services/claude.py -------------------------------------------------------------------------------- /marker/services/gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/services/gemini.py -------------------------------------------------------------------------------- /marker/services/ollama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/services/ollama.py -------------------------------------------------------------------------------- /marker/services/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/services/openai.py -------------------------------------------------------------------------------- /marker/services/vertex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/services/vertex.py -------------------------------------------------------------------------------- /marker/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/settings.py -------------------------------------------------------------------------------- /marker/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/util.py -------------------------------------------------------------------------------- /marker/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /marker/utils/batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/utils/batch.py -------------------------------------------------------------------------------- /marker/utils/gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/utils/gpu.py -------------------------------------------------------------------------------- /marker/utils/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker/utils/image.py -------------------------------------------------------------------------------- /marker_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker_app.py -------------------------------------------------------------------------------- /marker_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/marker_server.py -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/pyproject.toml -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/pytest.ini -------------------------------------------------------------------------------- /signatures/version1/cla.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/signatures/version1/cla.json -------------------------------------------------------------------------------- /static/fonts/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /tests/builders/test_blank_page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_blank_page.py -------------------------------------------------------------------------------- /tests/builders/test_document_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_document_builder.py -------------------------------------------------------------------------------- /tests/builders/test_garbled_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_garbled_pdf.py -------------------------------------------------------------------------------- /tests/builders/test_layout_replace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_layout_replace.py -------------------------------------------------------------------------------- /tests/builders/test_ocr_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_ocr_builder.py -------------------------------------------------------------------------------- /tests/builders/test_ocr_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_ocr_pipeline.py -------------------------------------------------------------------------------- /tests/builders/test_overriding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_overriding.py -------------------------------------------------------------------------------- /tests/builders/test_pdf_links.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_pdf_links.py -------------------------------------------------------------------------------- /tests/builders/test_rotated_bboxes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_rotated_bboxes.py -------------------------------------------------------------------------------- /tests/builders/test_strip_existing_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_strip_existing_ocr.py -------------------------------------------------------------------------------- /tests/builders/test_structure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/builders/test_structure.py -------------------------------------------------------------------------------- /tests/config/test_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/config/test_config.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/converters/test_extraction_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/converters/test_extraction_converter.py -------------------------------------------------------------------------------- /tests/converters/test_ocr_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/converters/test_ocr_converter.py -------------------------------------------------------------------------------- /tests/converters/test_pdf_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/converters/test_pdf_converter.py -------------------------------------------------------------------------------- /tests/converters/test_table_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/converters/test_table_converter.py -------------------------------------------------------------------------------- /tests/processors/test_document_toc_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/processors/test_document_toc_processor.py -------------------------------------------------------------------------------- /tests/processors/test_equation_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/processors/test_equation_processor.py -------------------------------------------------------------------------------- /tests/processors/test_footnote_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/processors/test_footnote_processor.py -------------------------------------------------------------------------------- /tests/processors/test_ignoretext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/processors/test_ignoretext.py -------------------------------------------------------------------------------- /tests/processors/test_llm_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/processors/test_llm_processors.py -------------------------------------------------------------------------------- /tests/processors/test_table_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/processors/test_table_merge.py -------------------------------------------------------------------------------- /tests/processors/test_table_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/processors/test_table_processor.py -------------------------------------------------------------------------------- /tests/providers/test_document_providers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/providers/test_document_providers.py -------------------------------------------------------------------------------- /tests/providers/test_image_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/providers/test_image_provider.py -------------------------------------------------------------------------------- /tests/providers/test_pdf_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/providers/test_pdf_provider.py -------------------------------------------------------------------------------- /tests/renderers/test_chunk_renderer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/renderers/test_chunk_renderer.py -------------------------------------------------------------------------------- /tests/renderers/test_extract_images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/renderers/test_extract_images.py -------------------------------------------------------------------------------- /tests/renderers/test_html_renderer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/renderers/test_html_renderer.py -------------------------------------------------------------------------------- /tests/renderers/test_json_renderer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/renderers/test_json_renderer.py -------------------------------------------------------------------------------- /tests/renderers/test_markdown_renderer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/renderers/test_markdown_renderer.py -------------------------------------------------------------------------------- /tests/schema/groups/test_list_grouping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/schema/groups/test_list_grouping.py -------------------------------------------------------------------------------- /tests/services/test_service_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/services/test_service_init.py -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalab-to/marker/HEAD/tests/utils.py --------------------------------------------------------------------------------