├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ ├── claude.yml │ └── create_issue.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── examples └── ocr │ ├── engine.py │ ├── output │ └── .gitignore │ ├── requirements.txt │ └── validate_ocr_performance.py ├── img └── unstructured_logo.png ├── logger_config.yaml ├── pyproject.toml ├── pytest.ini ├── requirements ├── base.in ├── base.txt ├── constraints.in ├── dev.in ├── dev.txt ├── test.in └── test.txt ├── sample-docs ├── 2023-Jan-economic-outlook.pdf ├── IRS-form-1987.pdf ├── RGBA_image.png ├── Silent-Giant.pdf ├── design-thinking.pdf ├── easy_table.jpg ├── embedded-images.pdf ├── empty-document.pdf ├── example_table.jpg ├── ilpa-example-1.jpg ├── layout-parser-paper-fast.jpg ├── layout-parser-paper-fast.pdf ├── layout-parser-paper.pdf ├── loremipsum-flat.pdf ├── loremipsum.jpg ├── loremipsum.pdf ├── loremipsum.png ├── loremipsum.tiff ├── loremipsum_multipage.pdf ├── non-embedded.pdf ├── password.pdf ├── patent-1p.pdf ├── patent.pdf ├── pdf2image-memory-error-test-400p.pdf ├── recalibrating-risk-report.pdf ├── receipt-sample.jpg ├── table-multi-row-column-cells.png └── test-image.jpg ├── scripts ├── docker-build.sh ├── shellcheck.sh ├── test-unstructured-ingest-helper.sh └── version-sync.sh ├── setup.cfg ├── setup.py ├── test_unstructured_inference ├── conftest.py ├── inference │ ├── test_layout.py │ └── test_layout_element.py ├── models │ ├── test_detectron2onnx.py │ ├── test_eval.py │ ├── test_model.py │ ├── test_tables.py │ └── test_yolox.py ├── test_config.py ├── test_elements.py ├── test_logger.py ├── test_math.py ├── test_utils.py └── test_visualization.py └── unstructured_inference ├── __init__.py ├── __version__.py ├── config.py ├── constants.py ├── inference ├── __init__.py ├── elements.py ├── layout.py └── layoutelement.py ├── logger.py ├── math.py ├── models ├── __init__.py ├── base.py ├── detectron2onnx.py ├── eval.py ├── table_postprocess.py ├── tables.py ├── unstructuredmodel.py └── yolox.py ├── utils.py └── visualize.py /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.github/workflows/claude.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/.github/workflows/claude.yml -------------------------------------------------------------------------------- /.github/workflows/create_issue.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/.github/workflows/create_issue.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements/base.in 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/README.md -------------------------------------------------------------------------------- /examples/ocr/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/examples/ocr/engine.py -------------------------------------------------------------------------------- /examples/ocr/output/.gitignore: -------------------------------------------------------------------------------- 1 | * -------------------------------------------------------------------------------- /examples/ocr/requirements.txt: -------------------------------------------------------------------------------- 1 | unstructured[local-inference] 2 | nltk -------------------------------------------------------------------------------- /examples/ocr/validate_ocr_performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/examples/ocr/validate_ocr_performance.py -------------------------------------------------------------------------------- /img/unstructured_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/img/unstructured_logo.png -------------------------------------------------------------------------------- /logger_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/logger_config.yaml -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/pytest.ini -------------------------------------------------------------------------------- /requirements/base.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/requirements/base.in -------------------------------------------------------------------------------- /requirements/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/requirements/base.txt -------------------------------------------------------------------------------- /requirements/constraints.in: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements/dev.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/requirements/dev.in -------------------------------------------------------------------------------- /requirements/dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/requirements/dev.txt -------------------------------------------------------------------------------- /requirements/test.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/requirements/test.in -------------------------------------------------------------------------------- /requirements/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/requirements/test.txt -------------------------------------------------------------------------------- /sample-docs/2023-Jan-economic-outlook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/2023-Jan-economic-outlook.pdf -------------------------------------------------------------------------------- /sample-docs/IRS-form-1987.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/IRS-form-1987.pdf -------------------------------------------------------------------------------- /sample-docs/RGBA_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/RGBA_image.png -------------------------------------------------------------------------------- /sample-docs/Silent-Giant.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/Silent-Giant.pdf -------------------------------------------------------------------------------- /sample-docs/design-thinking.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/design-thinking.pdf -------------------------------------------------------------------------------- /sample-docs/easy_table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/easy_table.jpg -------------------------------------------------------------------------------- /sample-docs/embedded-images.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/embedded-images.pdf -------------------------------------------------------------------------------- /sample-docs/empty-document.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/empty-document.pdf -------------------------------------------------------------------------------- /sample-docs/example_table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/example_table.jpg -------------------------------------------------------------------------------- /sample-docs/ilpa-example-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/ilpa-example-1.jpg -------------------------------------------------------------------------------- /sample-docs/layout-parser-paper-fast.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/layout-parser-paper-fast.jpg -------------------------------------------------------------------------------- /sample-docs/layout-parser-paper-fast.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/layout-parser-paper-fast.pdf -------------------------------------------------------------------------------- /sample-docs/layout-parser-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/layout-parser-paper.pdf -------------------------------------------------------------------------------- /sample-docs/loremipsum-flat.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/loremipsum-flat.pdf -------------------------------------------------------------------------------- /sample-docs/loremipsum.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/loremipsum.jpg -------------------------------------------------------------------------------- /sample-docs/loremipsum.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/loremipsum.pdf -------------------------------------------------------------------------------- /sample-docs/loremipsum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/loremipsum.png -------------------------------------------------------------------------------- /sample-docs/loremipsum.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/loremipsum.tiff -------------------------------------------------------------------------------- /sample-docs/loremipsum_multipage.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/loremipsum_multipage.pdf -------------------------------------------------------------------------------- /sample-docs/non-embedded.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/non-embedded.pdf -------------------------------------------------------------------------------- /sample-docs/password.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/password.pdf -------------------------------------------------------------------------------- /sample-docs/patent-1p.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/patent-1p.pdf -------------------------------------------------------------------------------- /sample-docs/patent.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/patent.pdf -------------------------------------------------------------------------------- /sample-docs/pdf2image-memory-error-test-400p.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/pdf2image-memory-error-test-400p.pdf -------------------------------------------------------------------------------- /sample-docs/recalibrating-risk-report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/recalibrating-risk-report.pdf -------------------------------------------------------------------------------- /sample-docs/receipt-sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/receipt-sample.jpg -------------------------------------------------------------------------------- /sample-docs/table-multi-row-column-cells.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/table-multi-row-column-cells.png -------------------------------------------------------------------------------- /sample-docs/test-image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/sample-docs/test-image.jpg -------------------------------------------------------------------------------- /scripts/docker-build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/scripts/docker-build.sh -------------------------------------------------------------------------------- /scripts/shellcheck.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/scripts/shellcheck.sh -------------------------------------------------------------------------------- /scripts/test-unstructured-ingest-helper.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/scripts/test-unstructured-ingest-helper.sh -------------------------------------------------------------------------------- /scripts/version-sync.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/scripts/version-sync.sh -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/setup.py -------------------------------------------------------------------------------- /test_unstructured_inference/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/conftest.py -------------------------------------------------------------------------------- /test_unstructured_inference/inference/test_layout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/inference/test_layout.py -------------------------------------------------------------------------------- /test_unstructured_inference/inference/test_layout_element.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/inference/test_layout_element.py -------------------------------------------------------------------------------- /test_unstructured_inference/models/test_detectron2onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/models/test_detectron2onnx.py -------------------------------------------------------------------------------- /test_unstructured_inference/models/test_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/models/test_eval.py -------------------------------------------------------------------------------- /test_unstructured_inference/models/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/models/test_model.py -------------------------------------------------------------------------------- /test_unstructured_inference/models/test_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/models/test_tables.py -------------------------------------------------------------------------------- /test_unstructured_inference/models/test_yolox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/models/test_yolox.py -------------------------------------------------------------------------------- /test_unstructured_inference/test_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/test_config.py -------------------------------------------------------------------------------- /test_unstructured_inference/test_elements.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/test_elements.py -------------------------------------------------------------------------------- /test_unstructured_inference/test_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/test_logger.py -------------------------------------------------------------------------------- /test_unstructured_inference/test_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/test_math.py -------------------------------------------------------------------------------- /test_unstructured_inference/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/test_utils.py -------------------------------------------------------------------------------- /test_unstructured_inference/test_visualization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/test_unstructured_inference/test_visualization.py -------------------------------------------------------------------------------- /unstructured_inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_inference/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.1.2" # pragma: no cover 2 | -------------------------------------------------------------------------------- /unstructured_inference/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/config.py -------------------------------------------------------------------------------- /unstructured_inference/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/constants.py -------------------------------------------------------------------------------- /unstructured_inference/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_inference/inference/elements.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/inference/elements.py -------------------------------------------------------------------------------- /unstructured_inference/inference/layout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/inference/layout.py -------------------------------------------------------------------------------- /unstructured_inference/inference/layoutelement.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/inference/layoutelement.py -------------------------------------------------------------------------------- /unstructured_inference/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/logger.py -------------------------------------------------------------------------------- /unstructured_inference/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/math.py -------------------------------------------------------------------------------- /unstructured_inference/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_inference/models/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/models/base.py -------------------------------------------------------------------------------- /unstructured_inference/models/detectron2onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/models/detectron2onnx.py -------------------------------------------------------------------------------- /unstructured_inference/models/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/models/eval.py -------------------------------------------------------------------------------- /unstructured_inference/models/table_postprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/models/table_postprocess.py -------------------------------------------------------------------------------- /unstructured_inference/models/tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/models/tables.py -------------------------------------------------------------------------------- /unstructured_inference/models/unstructuredmodel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/models/unstructuredmodel.py -------------------------------------------------------------------------------- /unstructured_inference/models/yolox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/models/yolox.py -------------------------------------------------------------------------------- /unstructured_inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/utils.py -------------------------------------------------------------------------------- /unstructured_inference/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-inference/HEAD/unstructured_inference/visualize.py --------------------------------------------------------------------------------