├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md ├── dependabot.yml └── workflows │ ├── bump_libraries.yaml │ ├── ci.yml │ ├── claude.yml │ └── docker-publish.yml ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── LICENSE.md ├── Makefile ├── README.md ├── docker └── rockylinux-9.4 │ └── Dockerfile ├── exploration-notebooks ├── exploration-email.ipynb ├── exploration-html.ipynb └── exploration-powerpoint.ipynb ├── img ├── email-screenshot.png └── unstructured_logo.png ├── logger_config.yaml ├── prepline_general ├── __init__.py └── api │ ├── __init__.py │ ├── __version__.py │ ├── app.py │ ├── filetypes.py │ ├── general.py │ ├── models │ ├── __init__.py │ └── form_params.py │ ├── openapi.py │ └── utils.py ├── preprocessing-pipeline-family.yaml ├── pyproject.toml ├── requirements ├── base.in ├── base.txt ├── constraints.in ├── constraints.txt ├── test.in └── test.txt ├── sample-docs ├── .gitkeep ├── DA-1p-with-duplicate-pages.pdf ├── DA-1p.bmp ├── DA-1p.heic ├── README.md ├── README.rst ├── alert.eml ├── announcement.eml ├── embedded-images-tables.jpg ├── embedded-images-tables.pdf ├── english-and-korean.png ├── failing-encrypted.pdf ├── failing-invalid.pdf ├── failing-missing-pages.pdf ├── failing-missing-root.pdf ├── fake-doc.rtf ├── fake-email-attachment.eml ├── fake-email-image-embedded.eml ├── fake-email.eml ├── fake-email.msg ├── fake-html.html ├── fake-power-point.ppt ├── fake-power-point.pptx ├── fake-text-utf-32.txt ├── fake-text.txt ├── fake-xml.xml ├── fake.doc ├── fake.docx ├── fake.odt ├── family-day.eml ├── layout-parser-paper-fast.jpg ├── layout-parser-paper-fast.pdf ├── layout-parser-paper-fast.tiff ├── layout-parser-paper-with-table.jpg ├── layout-parser-paper.pdf ├── layout-parser-paper.pdf.gz ├── list-item-example.pdf ├── notes.ppt ├── notes.pptx ├── spring-weather.html.json ├── stanley-cups.csv ├── stanley-cups.tsv ├── stanley-cups.xlsx └── winter-sports.epub ├── scripts ├── app-start.sh ├── docker-build.sh ├── docker-smoke-test.sh ├── install-pandoc.sh ├── parallel-mode-test.sh ├── shellcheck.sh ├── smoketest.py ├── version-increment.sh └── version-sync.sh ├── setup.cfg └── test_general ├── __init__.py └── api ├── .gitkeep ├── __init__.py ├── test_app.py ├── test_deprecated_api.py ├── test_general.py ├── test_gzip.py └── test_utils.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/.github/ISSUE_TEMPLATE/bug_report.md -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/workflows/bump_libraries.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/.github/workflows/bump_libraries.yaml -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.github/workflows/claude.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/.github/workflows/claude.yml -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/.github/workflows/docker-publish.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/.gitignore -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/LICENSE.md -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/README.md -------------------------------------------------------------------------------- /docker/rockylinux-9.4/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/docker/rockylinux-9.4/Dockerfile -------------------------------------------------------------------------------- /exploration-notebooks/exploration-email.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/exploration-notebooks/exploration-email.ipynb -------------------------------------------------------------------------------- /exploration-notebooks/exploration-html.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/exploration-notebooks/exploration-html.ipynb -------------------------------------------------------------------------------- /exploration-notebooks/exploration-powerpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/exploration-notebooks/exploration-powerpoint.ipynb -------------------------------------------------------------------------------- /img/email-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/img/email-screenshot.png -------------------------------------------------------------------------------- /img/unstructured_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/img/unstructured_logo.png -------------------------------------------------------------------------------- /logger_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/logger_config.yaml -------------------------------------------------------------------------------- /prepline_general/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prepline_general/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prepline_general/api/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.90" # pragma: no cover 2 | -------------------------------------------------------------------------------- /prepline_general/api/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/prepline_general/api/app.py -------------------------------------------------------------------------------- /prepline_general/api/filetypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/prepline_general/api/filetypes.py -------------------------------------------------------------------------------- /prepline_general/api/general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/prepline_general/api/general.py -------------------------------------------------------------------------------- /prepline_general/api/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prepline_general/api/models/form_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/prepline_general/api/models/form_params.py -------------------------------------------------------------------------------- /prepline_general/api/openapi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/prepline_general/api/openapi.py -------------------------------------------------------------------------------- /prepline_general/api/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/prepline_general/api/utils.py -------------------------------------------------------------------------------- /preprocessing-pipeline-family.yaml: -------------------------------------------------------------------------------- 1 | name: general 2 | version: 0.0.90 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements/base.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/requirements/base.in -------------------------------------------------------------------------------- /requirements/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/requirements/base.txt -------------------------------------------------------------------------------- /requirements/constraints.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/requirements/constraints.in -------------------------------------------------------------------------------- /requirements/constraints.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/requirements/constraints.txt -------------------------------------------------------------------------------- /requirements/test.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/requirements/test.in -------------------------------------------------------------------------------- /requirements/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/requirements/test.txt -------------------------------------------------------------------------------- /sample-docs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sample-docs/DA-1p-with-duplicate-pages.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/DA-1p-with-duplicate-pages.pdf -------------------------------------------------------------------------------- /sample-docs/DA-1p.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/DA-1p.bmp -------------------------------------------------------------------------------- /sample-docs/DA-1p.heic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/DA-1p.heic -------------------------------------------------------------------------------- /sample-docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/README.md -------------------------------------------------------------------------------- /sample-docs/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/README.rst -------------------------------------------------------------------------------- /sample-docs/alert.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/alert.eml -------------------------------------------------------------------------------- /sample-docs/announcement.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/announcement.eml -------------------------------------------------------------------------------- /sample-docs/embedded-images-tables.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/embedded-images-tables.jpg -------------------------------------------------------------------------------- /sample-docs/embedded-images-tables.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/embedded-images-tables.pdf -------------------------------------------------------------------------------- /sample-docs/english-and-korean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/english-and-korean.png -------------------------------------------------------------------------------- /sample-docs/failing-encrypted.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/failing-encrypted.pdf -------------------------------------------------------------------------------- /sample-docs/failing-invalid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/failing-invalid.pdf -------------------------------------------------------------------------------- /sample-docs/failing-missing-pages.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/failing-missing-pages.pdf -------------------------------------------------------------------------------- /sample-docs/failing-missing-root.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/failing-missing-root.pdf -------------------------------------------------------------------------------- /sample-docs/fake-doc.rtf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-doc.rtf -------------------------------------------------------------------------------- /sample-docs/fake-email-attachment.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-email-attachment.eml -------------------------------------------------------------------------------- /sample-docs/fake-email-image-embedded.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-email-image-embedded.eml -------------------------------------------------------------------------------- /sample-docs/fake-email.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-email.eml -------------------------------------------------------------------------------- /sample-docs/fake-email.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-email.msg -------------------------------------------------------------------------------- /sample-docs/fake-html.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-html.html -------------------------------------------------------------------------------- /sample-docs/fake-power-point.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-power-point.ppt -------------------------------------------------------------------------------- /sample-docs/fake-power-point.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-power-point.pptx -------------------------------------------------------------------------------- /sample-docs/fake-text-utf-32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-text-utf-32.txt -------------------------------------------------------------------------------- /sample-docs/fake-text.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-text.txt -------------------------------------------------------------------------------- /sample-docs/fake-xml.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake-xml.xml -------------------------------------------------------------------------------- /sample-docs/fake.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake.doc -------------------------------------------------------------------------------- /sample-docs/fake.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake.docx -------------------------------------------------------------------------------- /sample-docs/fake.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/fake.odt -------------------------------------------------------------------------------- /sample-docs/family-day.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/family-day.eml -------------------------------------------------------------------------------- /sample-docs/layout-parser-paper-fast.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/layout-parser-paper-fast.jpg -------------------------------------------------------------------------------- /sample-docs/layout-parser-paper-fast.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/layout-parser-paper-fast.pdf -------------------------------------------------------------------------------- /sample-docs/layout-parser-paper-fast.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/layout-parser-paper-fast.tiff -------------------------------------------------------------------------------- /sample-docs/layout-parser-paper-with-table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/layout-parser-paper-with-table.jpg -------------------------------------------------------------------------------- /sample-docs/layout-parser-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/layout-parser-paper.pdf -------------------------------------------------------------------------------- /sample-docs/layout-parser-paper.pdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/layout-parser-paper.pdf.gz -------------------------------------------------------------------------------- /sample-docs/list-item-example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/list-item-example.pdf -------------------------------------------------------------------------------- /sample-docs/notes.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/notes.ppt -------------------------------------------------------------------------------- /sample-docs/notes.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/notes.pptx -------------------------------------------------------------------------------- /sample-docs/spring-weather.html.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/spring-weather.html.json -------------------------------------------------------------------------------- /sample-docs/stanley-cups.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/stanley-cups.csv -------------------------------------------------------------------------------- /sample-docs/stanley-cups.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/stanley-cups.tsv -------------------------------------------------------------------------------- /sample-docs/stanley-cups.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/stanley-cups.xlsx -------------------------------------------------------------------------------- /sample-docs/winter-sports.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/sample-docs/winter-sports.epub -------------------------------------------------------------------------------- /scripts/app-start.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/scripts/app-start.sh -------------------------------------------------------------------------------- /scripts/docker-build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/scripts/docker-build.sh -------------------------------------------------------------------------------- /scripts/docker-smoke-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/scripts/docker-smoke-test.sh -------------------------------------------------------------------------------- /scripts/install-pandoc.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/scripts/install-pandoc.sh -------------------------------------------------------------------------------- /scripts/parallel-mode-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/scripts/parallel-mode-test.sh -------------------------------------------------------------------------------- /scripts/shellcheck.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/scripts/shellcheck.sh -------------------------------------------------------------------------------- /scripts/smoketest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/scripts/smoketest.py -------------------------------------------------------------------------------- /scripts/version-increment.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/scripts/version-increment.sh -------------------------------------------------------------------------------- /scripts/version-sync.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/scripts/version-sync.sh -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | exclude = 4 | prepline_*/api 5 | -------------------------------------------------------------------------------- /test_general/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_general/api/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_general/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_general/api/test_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/test_general/api/test_app.py -------------------------------------------------------------------------------- /test_general/api/test_deprecated_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/test_general/api/test_deprecated_api.py -------------------------------------------------------------------------------- /test_general/api/test_general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/test_general/api/test_general.py -------------------------------------------------------------------------------- /test_general/api/test_gzip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/test_general/api/test_gzip.py -------------------------------------------------------------------------------- /test_general/api/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-api/HEAD/test_general/api/test_utils.py --------------------------------------------------------------------------------