├── .github ├── actions │ ├── generate-cache-key │ │ └── action.yml │ ├── setup-environment │ │ └── action.yml │ └── setup-python │ │ └── action.yml └── workflows │ ├── claude.yml │ ├── codeflash.yml │ ├── e2e.yml │ ├── ingest-test-fixtures-update-pr.yml │ ├── release.yml │ └── unit_tests.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE.md ├── MANIFEST.in ├── Makefile ├── README.md ├── docs ├── README.md ├── assets │ ├── pipeline.excalidraw │ ├── pipeline.png │ ├── sequence.png │ ├── sequence.txt │ └── unstructured_ingest_cli_pipeline_diagram.png └── connector_development.md ├── example-docs ├── 2023-half-year-analyses-by-segment.xlsx ├── CantinaBand3.wav ├── README.md ├── README.org ├── README.rst ├── book-war-and-peace-1225p.txt ├── book-war-and-peace-1p.txt ├── category-level.docx ├── contains-pictures.docx ├── csv-with-escaped-commas.csv ├── csv-with-long-lines.csv ├── docx-hdrftr.docx ├── docx-shapes.docx ├── docx-tables.docx ├── duplicate-paragraphs.doc ├── duplicate-paragraphs.docx ├── eml │ ├── email-equals-attachment-filename.eml │ ├── email-inline-content-disposition.eml │ ├── email-no-html-content-1.eml │ ├── email-no-utf8-2008-07-16.062410.eml │ ├── email-no-utf8-2014-03-17.111517.eml │ ├── email-replace-mime-encodings-error-1.eml │ ├── email-replace-mime-encodings-error-2.eml │ ├── email-replace-mime-encodings-error-3.eml │ ├── email-replace-mime-encodings-error-4.eml │ ├── email-replace-mime-encodings-error-5.eml │ ├── email-with-image.eml │ ├── fake-email-attachment.eml │ ├── fake-email-b64.eml │ ├── fake-email-header.eml │ ├── fake-email-image-embedded.eml │ ├── fake-email-malformed-encoding.eml │ ├── fake-email-utf-16-be.eml │ ├── fake-email-utf-16-le.eml │ ├── fake-email-utf-16.eml │ ├── fake-email.eml │ ├── fake-email.txt │ ├── fake-encrypted.eml │ └── signed-doc.p7s ├── emoji.xlsx ├── empty.txt ├── example-10k-1p.html ├── example-10k-230p.html ├── example-10k-utf-16.html ├── example-10k.html ├── example-list-items-multiple.docx ├── example-steelJIS-datasheet-utf-16.html ├── example-steelJIS-datasheet.html ├── example-with-scripts.html ├── factbook-utf-16.xml ├── factbook.xml ├── fake-doc-emphasized-text.doc ├── fake-doc-emphasized-text.docx ├── fake-doc.rtf ├── fake-email-attachment.msg ├── fake-email-multiple-attachments.msg ├── fake-email.eml ├── fake-email.msg ├── fake-email.txt ├── fake-encrypted.msg ├── fake-html-cp1252.html ├── fake-html-lang-de.html ├── fake-html-pre.htm ├── fake-html-with-duplicate-elements.html ├── fake-html-with-footer-and-header.html ├── fake-html.html ├── fake-incomplete-json.txt ├── fake-power-point-malformed.pptx ├── fake-power-point-many-pages.pptx ├── fake-power-point-table.pptx ├── fake-power-point.ppt ├── fake-power-point.pptx ├── fake-text-utf-16-be.txt ├── fake-text-utf-16-le.txt ├── fake-text-utf-16.txt ├── fake-text-utf-32.txt ├── fake-text.txt ├── fake.doc ├── fake.docx ├── fake.go ├── fake.odt ├── fake_table.docx ├── group-shapes-nested.pptx ├── handbook-1p-no-rendered-page-breaks.docx ├── handbook-1p.docx ├── handbook-872p.docx ├── hebrew-text-base64-iso88598i.txt ├── hlink-meta.docx ├── ideas-page.html ├── img │ ├── DA-1p.heic │ ├── DA-1p.jpg │ ├── DA-1p.png │ ├── bmp_24.bmp │ ├── chi_sim_image.jpeg │ ├── double-column-A.jpg │ ├── double-column-B.jpg │ ├── embedded-images-tables.jpg │ ├── english-and-korean.png │ ├── example.jpg │ ├── jpn-vert.jpeg │ ├── layout-parser-paper-10p.jpg │ ├── layout-parser-paper-combined.tiff │ ├── layout-parser-paper-fast.jpg │ ├── layout-parser-paper-fast.tiff │ ├── layout-parser-paper-with-table.jpg │ └── table-multi-row-column-cells.png ├── language-docs │ ├── UDHR_first_article_all.txt │ ├── eng_afr_spa.txt │ ├── eng_spa.txt │ ├── eng_spa.xlsx │ ├── eng_spa_mult.doc │ ├── eng_spa_mult.docx │ ├── eng_spa_mult.eml │ ├── eng_spa_mult.epub │ ├── eng_spa_mult.html │ ├── eng_spa_mult.md │ ├── eng_spa_mult.odt │ ├── eng_spa_mult.org │ ├── eng_spa_mult.ppt │ ├── eng_spa_mult.pptx │ ├── eng_spa_mult.rst │ ├── eng_spa_mult.rtf │ ├── eng_spa_mult.txt │ └── eng_spa_mult.xml ├── logger.py ├── more-than-1k-cells.xlsx ├── norwich-city.txt ├── page-breaks.docx ├── pdf │ ├── DA-1p.pdf │ ├── DA-619p.pdf │ ├── a1977-backus-p21.pdf │ ├── all-number-table.pdf │ ├── chevron-page.pdf │ ├── copy-protected.pdf │ ├── embedded-images-tables.pdf │ ├── embedded-images.pdf │ ├── embedded-link.pdf │ ├── emphasis-text.pdf │ ├── failure-after-repair.pdf │ ├── fake-memo-with-duplicate-page.pdf │ ├── fake-memo.pdf │ ├── header-test-doc.pdf │ ├── interface-config-guide-p93.pdf │ ├── invalid-pdf-structure-pdfminer-entire-doc.pdf │ ├── invalid-pdf-structure-pdfminer-one-page.pdf │ ├── korean-text-with-tables.pdf │ ├── layout-parser-paper-fast.pdf │ ├── layout-parser-paper-with-empty-pages.pdf │ ├── layout-parser-paper-with-table.pdf │ ├── layout-parser-paper.pdf │ ├── list-item-example.pdf │ ├── loremipsum-flat.pdf │ ├── multi-column-2p.pdf │ ├── multi-column.pdf │ ├── negative-coords.pdf │ ├── pdf-bad-color-space.pdf │ ├── pdf2image-memory-error-test-400p.pdf │ ├── reliance.pdf │ └── table-multi-row-column-cells.pdf ├── picture.pptx ├── sample-presentation.pptx ├── science-exploration-1p.pptx ├── science-exploration-369p.pptx ├── simple-table.md ├── simple.doc ├── simple.docx ├── simple.json ├── simple.odt ├── simple.yaml ├── simple.zip ├── spring-weather.html.json ├── stanley-cups-with-emoji.csv ├── stanley-cups-with-emoji.tsv ├── stanley-cups.csv ├── stanley-cups.tsv ├── stanley-cups.xlsx ├── table-multi-row-column-cells-actual.csv ├── table-semicolon-delimiter.csv ├── tables-with-incomplete-rows.docx ├── teams_chat.docx ├── test_evaluate_files │ ├── filter_list.txt │ ├── gold_standard_cct │ │ ├── Bank Good Credit Loan.pptx.txt │ │ ├── Performance-Audit-Discussion.pdf.txt │ │ └── currency.csv.txt │ ├── gold_standard_element_type │ │ └── IRS-form-1987.pdf.json │ ├── gold_standard_table_structure │ │ └── IRS-2023-Form-1095-A.pdf.json │ ├── unstructured_output │ │ ├── Bank Good Credit Loan.pptx.json │ │ ├── IRS-form-1987.pdf.json │ │ ├── Performance-Audit-Discussion.pdf.json │ │ ├── currency.csv.json │ │ └── form.json │ ├── unstructured_output_cct │ │ ├── Bank Good Credit Loan.pptx.txt │ │ ├── IRS-form-1987.pdf.txt │ │ ├── Performance-Audit-Discussion.pdf.txt │ │ └── currency.csv.txt │ └── unstructured_output_table_structure │ │ └── IRS-2023-Form-1095-A.pdf.json ├── tests-example.xls ├── unsupported │ └── factbook.xsl ├── vodafone.xlsx ├── winter-sports.epub └── xlsx-subtable-cases.xlsx ├── examples ├── __init__.py ├── airtable.py ├── azure_cognitive_search.py ├── chroma.py ├── couchbase.py ├── databricks_volumes_dest.py ├── databricks_volumes_source.py ├── delta_table.py ├── discord_example.py ├── elasticsearch.py ├── google_drive.py ├── kdbai.py ├── local.py ├── milvus.py ├── mongodb.py ├── notebooks │ └── databricks_delta_tables.ipynb ├── opensearch.py ├── pinecone.py ├── s3.py ├── salesforce.py ├── sharepoint.py ├── singlestore.py ├── sql.py ├── vectara.py └── weaviate.py ├── pyproject.toml ├── requirements ├── base.txt ├── connectors │ ├── airtable.txt │ ├── astradb.txt │ ├── azure-ai-search.txt │ ├── azure.txt │ ├── biomed.txt │ ├── box.txt │ ├── chroma.txt │ ├── clarifai.txt │ ├── confluence.txt │ ├── couchbase.txt │ ├── databricks-delta-tables.txt │ ├── databricks-volumes.txt │ ├── delta-table.txt │ ├── discord.txt │ ├── dropbox.txt │ ├── duckdb.txt │ ├── elasticsearch.txt │ ├── gcs.txt │ ├── github.txt │ ├── gitlab.txt │ ├── google-drive.txt │ ├── hubspot.txt │ ├── ibm-watsonx-s3.txt │ ├── jira.txt │ ├── kafka.txt │ ├── kdbai.txt │ ├── lancedb.txt │ ├── milvus.txt │ ├── mongodb.txt │ ├── neo4j.txt │ ├── notion.txt │ ├── onedrive.txt │ ├── opensearch.txt │ ├── outlook.txt │ ├── pinecone.txt │ ├── postgres.txt │ ├── qdrant.txt │ ├── reddit.txt │ ├── redis.txt │ ├── s3.txt │ ├── salesforce.txt │ ├── sftp.txt │ ├── sharepoint.txt │ ├── singlestore.txt │ ├── slack.txt │ ├── snowflake.txt │ ├── vastdb.txt │ ├── vectara.txt │ ├── weaviate.txt │ ├── wikipedia.txt │ └── zendesk.txt ├── embed │ ├── bedrock.txt │ ├── huggingface.txt │ ├── mixedbreadai.txt │ ├── octoai.txt │ ├── openai.txt │ ├── togetherai.txt │ ├── vertexai.txt │ └── voyageai.txt ├── local_partition │ ├── doc.txt │ ├── docx.txt │ ├── epub.txt │ ├── image.txt │ ├── md.txt │ ├── msg.txt │ ├── odt.txt │ ├── org.txt │ ├── pdf.txt │ ├── ppt.txt │ ├── pptx.txt │ ├── rst.txt │ ├── rtf.txt │ ├── tsv.txt │ └── xlsx.txt └── remote │ └── client.txt ├── scripts ├── check_untagged_tests.sh ├── install-docker-compose.sh ├── parse_pytest_report.py ├── pip-compile.sh └── version-sync.sh ├── test ├── __init__.py ├── integration │ ├── __init__.py │ ├── chunkers │ │ ├── __init__.py │ │ ├── assets │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ ├── README.md.json │ │ │ ├── README.rst.json │ │ │ ├── alert.eml.json │ │ │ ├── announcement.eml.json │ │ │ ├── embedded-images-tables.jpg.json │ │ │ ├── embedded-images-tables.pdf.json │ │ │ ├── english-and-korean.png.json │ │ │ ├── fake-doc.rtf.json │ │ │ ├── fake-email-attachment.eml.json │ │ │ ├── fake-email-image-embedded.eml.json │ │ │ ├── fake-email.eml.json │ │ │ ├── fake-email.msg.json │ │ │ ├── fake-html.html.json │ │ │ ├── fake-power-point.ppt.json │ │ │ ├── fake-power-point.pptx.json │ │ │ ├── fake-text-utf-32.txt.json │ │ │ ├── fake-text.txt.json │ │ │ ├── fake-xml.xml.json │ │ │ ├── fake.doc.json │ │ │ ├── fake.docx.json │ │ │ ├── fake.odt.json │ │ │ ├── family-day.eml.json │ │ │ ├── layout-parser-paper-fast.jpg.json │ │ │ ├── layout-parser-paper-fast.pdf.json │ │ │ ├── layout-parser-paper-with-table.jpg.json │ │ │ ├── layout-parser-paper.pdf.gz.json │ │ │ ├── layout-parser-paper.pdf.json │ │ │ ├── list-item-example.pdf.json │ │ │ ├── multi_page_image.tif.json │ │ │ ├── spring-weather.html.json.json │ │ │ ├── stanley-cups.csv.json │ │ │ ├── stanley-cups.tsv.json │ │ │ ├── stanley-cups.xlsx.json │ │ │ └── winter-sports.epub.json │ │ └── test_chunkers.py │ ├── connectors │ │ ├── __init__.py │ │ ├── assets │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ ├── conftest.py │ │ ├── databricks │ │ │ ├── __init__.py │ │ │ └── test_volumes_native.py │ │ ├── discord │ │ │ ├── __init__.py │ │ │ └── test_discord.py │ │ ├── duckdb │ │ │ ├── __init__.py │ │ │ ├── assets │ │ │ │ └── duckdb-schema.sql │ │ │ ├── conftest.py │ │ │ ├── test_duckdb.py │ │ │ └── test_motherduck.py │ │ ├── elasticsearch │ │ │ ├── __init__.py │ │ │ ├── assets │ │ │ │ ├── elasticsearch_elements_mappings.json │ │ │ │ ├── opensearch_elements_mappings.json │ │ │ │ └── wiki_movie_plots_small.csv │ │ │ ├── conftest.py │ │ │ ├── test_elasticsearch.py │ │ │ └── test_opensearch.py │ │ ├── env_setup │ │ │ ├── kafka │ │ │ │ └── docker-compose.yaml │ │ │ ├── milvus │ │ │ │ └── docker-compose.yml │ │ │ ├── minio │ │ │ │ └── source │ │ │ │ │ ├── .env │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── docker-compose.yaml │ │ │ │ │ ├── seed.sh │ │ │ │ │ └── wiki_movie_plots_small.csv │ │ │ └── sql │ │ │ │ ├── databricks_delta_tables │ │ │ │ └── destination │ │ │ │ │ └── schema.sql │ │ │ │ ├── postgres │ │ │ │ ├── destination │ │ │ │ │ ├── docker-compose.yaml │ │ │ │ │ └── pgvector-schema.sql │ │ │ │ └── source │ │ │ │ │ ├── docker-compose.yaml │ │ │ │ │ └── postgres-schema.sql │ │ │ │ ├── singlestore │ │ │ │ ├── destination │ │ │ │ │ ├── docker-compose.yaml │ │ │ │ │ └── schema.sql │ │ │ │ └── source │ │ │ │ │ ├── docker-compose.yaml │ │ │ │ │ └── schema.sql │ │ │ │ ├── snowflake │ │ │ │ ├── destination │ │ │ │ │ └── snowflake-schema.sql │ │ │ │ └── source │ │ │ │ │ └── snowflake-schema.sql │ │ │ │ └── sqlite │ │ │ │ ├── destination │ │ │ │ └── sqlite-schema.sql │ │ │ │ └── source │ │ │ │ └── sqlite-schema.sql │ │ ├── expected_results │ │ │ ├── .gitignore │ │ │ ├── astradb │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── 25b75f1d-a2ea-4c97-b75f-1da2eadc97f7.csv │ │ │ │ │ ├── 60297eea-73d7-4fca-a97e-ea73d7cfca62.csv │ │ │ │ │ ├── 641d99e3-9941-4c18-9d99-e399414c183d.csv │ │ │ │ │ ├── 762c0093-2277-4f3e-ac00-932277af3e0e.csv │ │ │ │ │ └── ae40df94-0b3a-4f89-80df-940b3a6f8966.csv │ │ │ │ ├── file_data │ │ │ │ │ ├── 25b75f1d-a2ea-4c97-b75f-1da2eadc97f7.csv.json │ │ │ │ │ ├── 43d02113-723f-5ec1-acbb-c8da8d3650dc.json │ │ │ │ │ ├── 60297eea-73d7-4fca-a97e-ea73d7cfca62.csv.json │ │ │ │ │ ├── 641d99e3-9941-4c18-9d99-e399414c183d.csv.json │ │ │ │ │ ├── 762c0093-2277-4f3e-ac00-932277af3e0e.csv.json │ │ │ │ │ └── ae40df94-0b3a-4f89-80df-940b3a6f8966.csv.json │ │ │ │ ├── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ │ └── stager_flatten_metadata │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── azure_ai_search │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── chroma │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── confluence_small │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ └── ~712020ee2049b23ac64da1b06405e83e96830f │ │ │ │ │ │ ├── 196720.html │ │ │ │ │ │ ├── 196722.html │ │ │ │ │ │ ├── 196752.html │ │ │ │ │ │ ├── 196760.html │ │ │ │ │ │ ├── 196766.html │ │ │ │ │ │ └── 425986.html │ │ │ │ └── file_data │ │ │ │ │ ├── 196720.json │ │ │ │ │ ├── 196722.json │ │ │ │ │ ├── 196752.json │ │ │ │ │ ├── 196760.json │ │ │ │ │ ├── 196766.json │ │ │ │ │ └── 425986.json │ │ │ ├── databricks_volumes_native │ │ │ │ ├── directory_structure.json │ │ │ │ └── file_data │ │ │ │ │ └── 9a6eb650-98d6-5465-8f1d-aa7118eee87e.json │ │ │ ├── databricks_volumes_native_pat │ │ │ │ ├── directory_structure.json │ │ │ │ └── file_data │ │ │ │ │ └── 9a6eb650-98d6-5465-8f1d-aa7118eee87e.json │ │ │ ├── directory_structure.json │ │ │ ├── discord │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── 1099442333440802930.txt │ │ │ │ │ └── 1099601456321003600.txt │ │ │ │ └── file_data │ │ │ │ │ ├── 1099442333440802930.json │ │ │ │ │ └── 1099601456321003600.json │ │ │ ├── downloads │ │ │ │ ├── 25b75f1d-a2ea-4c97-b75f-1da2eadc97f7.csv │ │ │ │ ├── 60297eea-73d7-4fca-a97e-ea73d7cfca62.csv │ │ │ │ ├── 641d99e3-9941-4c18-9d99-e399414c183d.csv │ │ │ │ ├── 762c0093-2277-4f3e-ac00-932277af3e0e.csv │ │ │ │ └── ae40df94-0b3a-4f89-80df-940b3a6f8966.csv │ │ │ ├── dropbox │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── handbook-1p.docx │ │ │ │ │ ├── nested-1 │ │ │ │ │ │ └── ideas-page.html │ │ │ │ │ ├── nested-2 │ │ │ │ │ │ └── ideas-page.html │ │ │ │ │ └── science-exploration-1p.pptx │ │ │ │ └── file_data │ │ │ │ │ ├── 00a08c10-1e52-5ade-8561-0819e1bdec38.json │ │ │ │ │ ├── 364c6697-4ed5-5976-88bf-c9c4dac07283.json │ │ │ │ │ ├── 8139a859-73b6-5442-866e-b5de0d81b6e6.json │ │ │ │ │ └── 980b8fb3-5b3d-52e3-875f-147a162e16c3.json │ │ │ ├── duckdb │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── elasticsearch │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── movies-0.txt │ │ │ │ │ ├── movies-1.txt │ │ │ │ │ ├── movies-2.txt │ │ │ │ │ ├── movies-3.txt │ │ │ │ │ ├── movies-4.txt │ │ │ │ │ ├── movies-5.txt │ │ │ │ │ ├── movies-6.txt │ │ │ │ │ ├── movies-7.txt │ │ │ │ │ ├── movies-8.txt │ │ │ │ │ └── movies-9.txt │ │ │ │ ├── file_data │ │ │ │ │ ├── 73bd091b-f16a-5d30-b5bf-de62c85ccbe1.json │ │ │ │ │ ├── movies-0.json │ │ │ │ │ ├── movies-1.json │ │ │ │ │ ├── movies-2.json │ │ │ │ │ ├── movies-3.json │ │ │ │ │ ├── movies-4.json │ │ │ │ │ ├── movies-5.json │ │ │ │ │ ├── movies-6.json │ │ │ │ │ ├── movies-7.json │ │ │ │ │ ├── movies-8.json │ │ │ │ │ └── movies-9.json │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── file_data │ │ │ │ ├── 25b75f1d-a2ea-4c97-b75f-1da2eadc97f7.csv.json │ │ │ │ ├── 43d02113-723f-5ec1-acbb-c8da8d3650dc.json │ │ │ │ ├── 60297eea-73d7-4fca-a97e-ea73d7cfca62.csv.json │ │ │ │ ├── 641d99e3-9941-4c18-9d99-e399414c183d.csv.json │ │ │ │ ├── 762c0093-2277-4f3e-ac00-932277af3e0e.csv.json │ │ │ │ └── ae40df94-0b3a-4f89-80df-940b3a6f8966.csv.json │ │ │ ├── github │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── LICENSE.txt │ │ │ │ │ └── test.html │ │ │ │ └── file_data │ │ │ │ │ ├── 7311a514-9924-571d-827e-db955b96320a.json │ │ │ │ │ └── 7630c7f7-4d81-5ec0-b041-c94abc25e1ee.json │ │ │ ├── google_drive │ │ │ │ ├── directory_structure.json │ │ │ │ └── file_data │ │ │ │ │ ├── 117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8.json │ │ │ │ │ ├── 1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o.json │ │ │ │ │ ├── 1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV.json │ │ │ │ │ └── 1yXGjX5j0MhKb01vfGRjNJqXyrHBnHxVo.json │ │ │ ├── google_drive_e2e │ │ │ │ ├── directory_structure.json │ │ │ │ └── file_data │ │ │ │ │ ├── 117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8.json │ │ │ │ │ ├── 1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o.json │ │ │ │ │ ├── 1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV.json │ │ │ │ │ └── 1yXGjX5j0MhKb01vfGRjNJqXyrHBnHxVo.json │ │ │ ├── google_drive_source │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ └── ingest_integration_test_do_not_change │ │ │ │ └── file_data │ │ │ │ │ └── 1r-RDeDtKprFQWST4PCIPV618y_sBL7N7EEWg7q4kZrU.json │ │ │ ├── jira │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── JCTP1 │ │ │ │ │ │ ├── JCTP1-1.txt │ │ │ │ │ │ ├── JCTP1-2.txt │ │ │ │ │ │ ├── JCTP1-3.txt │ │ │ │ │ │ ├── JCTP1-4.txt │ │ │ │ │ │ └── attachments │ │ │ │ │ │ │ └── 10000.png │ │ │ │ │ ├── JCTP2 │ │ │ │ │ │ ├── JCTP2-1.txt │ │ │ │ │ │ ├── JCTP2-2.txt │ │ │ │ │ │ └── JCTP2-3.txt │ │ │ │ │ └── JCTP3 │ │ │ │ │ │ └── JCTP3-1.txt │ │ │ │ └── file_data │ │ │ │ │ ├── 10000.json │ │ │ │ │ ├── 10000a.json │ │ │ │ │ ├── 10001.json │ │ │ │ │ ├── 10002.json │ │ │ │ │ ├── 10003.json │ │ │ │ │ ├── 10004.json │ │ │ │ │ ├── 10005.json │ │ │ │ │ ├── 10013.json │ │ │ │ │ └── 10014.json │ │ │ ├── kafka-cloud │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── fake-topic_0_0.txt │ │ │ │ │ ├── fake-topic_0_1.txt │ │ │ │ │ ├── fake-topic_0_2.txt │ │ │ │ │ ├── fake-topic_0_3.txt │ │ │ │ │ └── fake-topic_0_4.txt │ │ │ │ └── file_data │ │ │ │ │ ├── fake-topic_0_0.json │ │ │ │ │ ├── fake-topic_0_1.json │ │ │ │ │ ├── fake-topic_0_2.json │ │ │ │ │ ├── fake-topic_0_3.json │ │ │ │ │ └── fake-topic_0_4.json │ │ │ ├── kafka-local │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── fake-topic_0_0.txt │ │ │ │ │ ├── fake-topic_0_1.txt │ │ │ │ │ ├── fake-topic_0_2.txt │ │ │ │ │ ├── fake-topic_0_3.txt │ │ │ │ │ └── fake-topic_0_4.txt │ │ │ │ └── file_data │ │ │ │ │ ├── fake-topic_0_0.json │ │ │ │ │ ├── fake-topic_0_1.json │ │ │ │ │ ├── fake-topic_0_2.json │ │ │ │ │ ├── fake-topic_0_3.json │ │ │ │ │ └── fake-topic_0_4.json │ │ │ ├── lancedb │ │ │ │ └── stager │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.feather │ │ │ ├── milvus │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── mongodb │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── 659daefa21dd8c9054b084b6.txt │ │ │ │ │ ├── 659daefa21dd8c9054b084b7.txt │ │ │ │ │ ├── 659daefa21dd8c9054b084b8.txt │ │ │ │ │ └── 659daefa21dd8c9054b084b9.txt │ │ │ │ └── file_data │ │ │ │ │ ├── 172fdbf9-275b-5e38-82e6-4ec730d42460.json │ │ │ │ │ ├── 659daefa21dd8c9054b084b6.json │ │ │ │ │ ├── 659daefa21dd8c9054b084b7.json │ │ │ │ │ ├── 659daefa21dd8c9054b084b8.json │ │ │ │ │ └── 659daefa21dd8c9054b084b9.json │ │ │ ├── notion_database │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ └── 1722c3765a0a8082b382ebc2c62d3f4c.html │ │ │ │ └── file_data │ │ │ │ │ └── 1722c3765a0a8082b382ebc2c62d3f4c.json │ │ │ ├── notion_page │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ └── 1572c3765a0a806299f0dd6999f9e4c7.html │ │ │ │ └── file_data │ │ │ │ │ └── 1572c3765a0a806299f0dd6999f9e4c7.json │ │ │ ├── onedrive │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ └── fake-email.txt │ │ │ │ └── file_data │ │ │ │ │ └── 01FKMPQP5P3IF7RTMOW5EZPDJGEBVUREKR.json │ │ │ ├── opensearch │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── movies-0.txt │ │ │ │ │ ├── movies-1.txt │ │ │ │ │ ├── movies-2.txt │ │ │ │ │ ├── movies-3.txt │ │ │ │ │ ├── movies-4.txt │ │ │ │ │ ├── movies-5.txt │ │ │ │ │ ├── movies-6.txt │ │ │ │ │ ├── movies-7.txt │ │ │ │ │ ├── movies-8.txt │ │ │ │ │ └── movies-9.txt │ │ │ │ ├── file_data │ │ │ │ │ ├── 73bd091b-f16a-5d30-b5bf-de62c85ccbe1.json │ │ │ │ │ ├── movies-0.json │ │ │ │ │ ├── movies-1.json │ │ │ │ │ ├── movies-2.json │ │ │ │ │ ├── movies-3.json │ │ │ │ │ ├── movies-4.json │ │ │ │ │ ├── movies-5.json │ │ │ │ │ ├── movies-6.json │ │ │ │ │ ├── movies-7.json │ │ │ │ │ ├── movies-8.json │ │ │ │ │ └── movies-9.json │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── pinecone │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── postgres │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── cars-1-5fb93ce5.csv │ │ │ │ │ ├── cars-10-5fb93ce5.csv │ │ │ │ │ ├── cars-2-5fb93ce5.csv │ │ │ │ │ ├── cars-3-5fb93ce5.csv │ │ │ │ │ ├── cars-4-5fb93ce5.csv │ │ │ │ │ ├── cars-5-5fb93ce5.csv │ │ │ │ │ ├── cars-6-5fb93ce5.csv │ │ │ │ │ ├── cars-7-5fb93ce5.csv │ │ │ │ │ ├── cars-8-5fb93ce5.csv │ │ │ │ │ └── cars-9-5fb93ce5.csv │ │ │ │ ├── file_data │ │ │ │ │ ├── 1c704234-e0da-5676-be32-a12cc6407b3d.json │ │ │ │ │ ├── 324354db-0465-56ca-a023-072481f5dd03.json │ │ │ │ │ ├── cars-1-5fb93ce5.json │ │ │ │ │ ├── cars-10-5fb93ce5.json │ │ │ │ │ ├── cars-2-5fb93ce5.json │ │ │ │ │ ├── cars-3-5fb93ce5.json │ │ │ │ │ ├── cars-4-5fb93ce5.json │ │ │ │ │ ├── cars-5-5fb93ce5.json │ │ │ │ │ ├── cars-6-5fb93ce5.json │ │ │ │ │ ├── cars-7-5fb93ce5.json │ │ │ │ │ ├── cars-8-5fb93ce5.json │ │ │ │ │ └── cars-9-5fb93ce5.json │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── qdrant-local │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── s3-minio │ │ │ │ ├── expected_s3_keys.json │ │ │ │ └── file_data │ │ │ │ │ └── 61ec39a6-5088-5edd-abd2-6c36aa996068.json │ │ │ ├── s3-specialchar │ │ │ │ ├── expected_s3_keys.json │ │ │ │ └── file_data │ │ │ │ │ ├── 869bf15f-e840-51dc-a818-8d0b817817c9.json │ │ │ │ │ └── e2cb44ab-2f1a-5037-901d-a284371047bb.json │ │ │ ├── s3 │ │ │ │ ├── expected_s3_keys.json │ │ │ │ └── file_data │ │ │ │ │ ├── 4b30fb79-f071-557e-9357-ca8af7295ab2.json │ │ │ │ │ ├── 6375773c-0da2-5b4a-a337-ef63f6f96d94.json │ │ │ │ │ ├── bc4d433b-6e7b-5512-bb38-a2048b338e68.json │ │ │ │ │ └── ed83cc0e-d390-5018-bb74-bc4e2e46a206.json │ │ │ ├── sharepoint │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ └── fake-memo.pdf │ │ │ │ └── file_data │ │ │ │ │ └── 01QKP26QZL5KBVQTQ3IRDYF72MRH2QKKR3.json │ │ │ ├── sharepoint1 │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── Folder1 │ │ │ │ │ │ ├── Folder2 │ │ │ │ │ │ │ └── fake-email.txt │ │ │ │ │ │ └── fake-memo.pdf │ │ │ │ │ ├── book-war-and-peace-1p.txt │ │ │ │ │ └── list-item-example.pdf │ │ │ │ └── file_data │ │ │ │ │ ├── 0153RHRSABDC6JJIUHKJF2C5ZFGJF3C6XJ.json │ │ │ │ │ ├── 0153RHRSAVBNSXPKVIBZC3ZD53KENINVGG.json │ │ │ │ │ ├── 0153RHRSEXY3G5U3B5GBFY7TZKAY236XAL.json │ │ │ │ │ └── 0153RHRSFFVW43WTVG5ZFY5WL7ZH3GUOGT.json │ │ │ ├── sharepoint2 │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── Folder2 │ │ │ │ │ │ └── fake-email.txt │ │ │ │ │ └── fake-memo.pdf │ │ │ │ └── file_data │ │ │ │ │ ├── 0153RHRSEXY3G5U3B5GBFY7TZKAY236XAL.json │ │ │ │ │ └── 0153RHRSFFVW43WTVG5ZFY5WL7ZH3GUOGT.json │ │ │ ├── sharepoint3 │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── book-war-and-peace-1225p.txt │ │ │ │ │ └── fake-memo.pdf │ │ │ │ └── file_data │ │ │ │ │ ├── 01QKP26Q7PST2HZ7PAXFBYQ5G2QF2NLSPN.json │ │ │ │ │ └── 01QKP26QZL5KBVQTQ3IRDYF72MRH2QKKR3.json │ │ │ ├── sharepoint4 │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── Folder1 │ │ │ │ │ │ ├── Folder2 │ │ │ │ │ │ │ └── fake-email.txt │ │ │ │ │ │ └── fake-memo.pdf │ │ │ │ │ ├── book-war-and-peace-1p.txt │ │ │ │ │ └── list-item-example.pdf │ │ │ │ └── file_data │ │ │ │ │ ├── 0153RHRSABDC6JJIUHKJF2C5ZFGJF3C6XJ.json │ │ │ │ │ ├── 0153RHRSAVBNSXPKVIBZC3ZD53KENINVGG.json │ │ │ │ │ ├── 0153RHRSEXY3G5U3B5GBFY7TZKAY236XAL.json │ │ │ │ │ └── 0153RHRSFFVW43WTVG5ZFY5WL7ZH3GUOGT.json │ │ │ ├── sharepoint5 │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── e2e-library-folder │ │ │ │ │ │ └── book-war-and-peace-1p.txt │ │ │ │ │ ├── fake-doc.rtf │ │ │ │ │ └── fake-html.html │ │ │ │ └── file_data │ │ │ │ │ ├── 0153RHRSBCDXFGQVNK6NCZOQZY3CZ2CCI2.json │ │ │ │ │ ├── 0153RHRSD4LQFV3VWQRZCY3E724WTG5G6C.json │ │ │ │ │ └── 0153RHRSFPJTHUKBK7AFEJO3WOAEA3RC7D.json │ │ │ ├── sharepoint6 │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ └── book-war-and-peace-1p.txt │ │ │ │ └── file_data │ │ │ │ │ └── 0153RHRSD4LQFV3VWQRZCY3E724WTG5G6C.json │ │ │ ├── singlestore │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── cars-1-5fb93ce5.csv │ │ │ │ │ ├── cars-10-5fb93ce5.csv │ │ │ │ │ ├── cars-2-5fb93ce5.csv │ │ │ │ │ ├── cars-3-5fb93ce5.csv │ │ │ │ │ ├── cars-4-5fb93ce5.csv │ │ │ │ │ ├── cars-5-5fb93ce5.csv │ │ │ │ │ ├── cars-6-5fb93ce5.csv │ │ │ │ │ ├── cars-7-5fb93ce5.csv │ │ │ │ │ ├── cars-8-5fb93ce5.csv │ │ │ │ │ └── cars-9-5fb93ce5.csv │ │ │ │ ├── file_data │ │ │ │ │ ├── 1c704234-e0da-5676-be32-a12cc6407b3d.json │ │ │ │ │ ├── 324354db-0465-56ca-a023-072481f5dd03.json │ │ │ │ │ ├── cars-1-5fb93ce5.json │ │ │ │ │ ├── cars-10-5fb93ce5.json │ │ │ │ │ ├── cars-2-5fb93ce5.json │ │ │ │ │ ├── cars-3-5fb93ce5.json │ │ │ │ │ ├── cars-4-5fb93ce5.json │ │ │ │ │ ├── cars-5-5fb93ce5.json │ │ │ │ │ ├── cars-6-5fb93ce5.json │ │ │ │ │ ├── cars-7-5fb93ce5.json │ │ │ │ │ ├── cars-8-5fb93ce5.json │ │ │ │ │ └── cars-9-5fb93ce5.json │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── snowflake │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── cars-1-a2ccd8ea.csv │ │ │ │ │ ├── cars-10-a2ccd8ea.csv │ │ │ │ │ ├── cars-11-a2ccd8ea.csv │ │ │ │ │ ├── cars-12-a2ccd8ea.csv │ │ │ │ │ ├── cars-13-a2ccd8ea.csv │ │ │ │ │ ├── cars-14-a2ccd8ea.csv │ │ │ │ │ ├── cars-15-a2ccd8ea.csv │ │ │ │ │ ├── cars-16-a2ccd8ea.csv │ │ │ │ │ ├── cars-17-a2ccd8ea.csv │ │ │ │ │ ├── cars-18-a2ccd8ea.csv │ │ │ │ │ ├── cars-19-a2ccd8ea.csv │ │ │ │ │ ├── cars-2-a2ccd8ea.csv │ │ │ │ │ ├── cars-20-a2ccd8ea.csv │ │ │ │ │ ├── cars-3-a2ccd8ea.csv │ │ │ │ │ ├── cars-4-a2ccd8ea.csv │ │ │ │ │ ├── cars-5-a2ccd8ea.csv │ │ │ │ │ ├── cars-6-a2ccd8ea.csv │ │ │ │ │ ├── cars-7-a2ccd8ea.csv │ │ │ │ │ ├── cars-8-a2ccd8ea.csv │ │ │ │ │ └── cars-9-a2ccd8ea.csv │ │ │ │ ├── file_data │ │ │ │ │ ├── cars-1-a2ccd8ea.json │ │ │ │ │ ├── cars-10-a2ccd8ea.json │ │ │ │ │ ├── cars-11-a2ccd8ea.json │ │ │ │ │ ├── cars-12-a2ccd8ea.json │ │ │ │ │ ├── cars-13-a2ccd8ea.json │ │ │ │ │ ├── cars-14-a2ccd8ea.json │ │ │ │ │ ├── cars-15-a2ccd8ea.json │ │ │ │ │ ├── cars-16-a2ccd8ea.json │ │ │ │ │ ├── cars-17-a2ccd8ea.json │ │ │ │ │ ├── cars-18-a2ccd8ea.json │ │ │ │ │ ├── cars-19-a2ccd8ea.json │ │ │ │ │ ├── cars-2-a2ccd8ea.json │ │ │ │ │ ├── cars-20-a2ccd8ea.json │ │ │ │ │ ├── cars-3-a2ccd8ea.json │ │ │ │ │ ├── cars-4-a2ccd8ea.json │ │ │ │ │ ├── cars-5-a2ccd8ea.json │ │ │ │ │ ├── cars-6-a2ccd8ea.json │ │ │ │ │ ├── cars-7-a2ccd8ea.json │ │ │ │ │ ├── cars-8-a2ccd8ea.json │ │ │ │ │ └── cars-9-a2ccd8ea.json │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── sqlite │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ ├── cars-1-5fb93ce5.csv │ │ │ │ │ ├── cars-10-5fb93ce5.csv │ │ │ │ │ ├── cars-2-5fb93ce5.csv │ │ │ │ │ ├── cars-3-5fb93ce5.csv │ │ │ │ │ ├── cars-4-5fb93ce5.csv │ │ │ │ │ ├── cars-5-5fb93ce5.csv │ │ │ │ │ ├── cars-6-5fb93ce5.csv │ │ │ │ │ ├── cars-7-5fb93ce5.csv │ │ │ │ │ ├── cars-8-5fb93ce5.csv │ │ │ │ │ └── cars-9-5fb93ce5.csv │ │ │ │ ├── file_data │ │ │ │ │ ├── 1c704234-e0da-5676-be32-a12cc6407b3d.json │ │ │ │ │ ├── 324354db-0465-56ca-a023-072481f5dd03.json │ │ │ │ │ ├── cars-1-5fb93ce5.json │ │ │ │ │ ├── cars-10-5fb93ce5.json │ │ │ │ │ ├── cars-2-5fb93ce5.json │ │ │ │ │ ├── cars-3-5fb93ce5.json │ │ │ │ │ ├── cars-4-5fb93ce5.json │ │ │ │ │ ├── cars-5-5fb93ce5.json │ │ │ │ │ ├── cars-6-5fb93ce5.json │ │ │ │ │ ├── cars-7-5fb93ce5.json │ │ │ │ │ ├── cars-8-5fb93ce5.json │ │ │ │ │ └── cars-9-5fb93ce5.json │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── stager │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── stager_flatten_metadata │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── vastdb │ │ │ │ └── stager │ │ │ │ │ ├── DA-1p-with-duplicate-pages.pdf.json │ │ │ │ │ └── DA-1p-with-duplicate-pages.pdf.ndjson │ │ │ ├── zendesk-articles │ │ │ │ ├── directory_structure.json │ │ │ │ ├── downloads │ │ │ │ │ └── articles │ │ │ │ │ │ ├── 34257293234331.html │ │ │ │ │ │ ├── 34257323898267.html │ │ │ │ │ │ ├── 34257323910555.html │ │ │ │ │ │ ├── 34257323933467.html │ │ │ │ │ │ ├── 34257323948187.html │ │ │ │ │ │ ├── 34257323962779.html │ │ │ │ │ │ ├── 34257441305499.html │ │ │ │ │ │ └── 34257467168795.html │ │ │ │ └── file_data │ │ │ │ │ ├── 34257293234331.json │ │ │ │ │ ├── 34257323898267.json │ │ │ │ │ ├── 34257323910555.json │ │ │ │ │ ├── 34257323933467.json │ │ │ │ │ ├── 34257323948187.json │ │ │ │ │ ├── 34257323962779.json │ │ │ │ │ ├── 34257441305499.json │ │ │ │ │ └── 34257467168795.json │ │ │ └── zendesk-tickets │ │ │ │ ├── directory_structure.json │ │ │ │ └── downloads │ │ │ │ └── tickets │ │ │ │ ├── 1.txt │ │ │ │ ├── 2.txt │ │ │ │ ├── 3.txt │ │ │ │ ├── 4.txt │ │ │ │ ├── 5.txt │ │ │ │ ├── 7.txt │ │ │ │ └── 8.txt │ │ ├── kafka │ │ │ ├── conftest.py │ │ │ ├── test_kafka_cloud.py │ │ │ └── test_kafka_local.py │ │ ├── sql │ │ │ ├── __init__.py │ │ │ ├── test_databricks_delta_tables.py │ │ │ ├── test_postgres.py │ │ │ ├── test_singlestore.py │ │ │ ├── test_snowflake.py │ │ │ ├── test_sqlite.py │ │ │ └── test_vastdb.py │ │ ├── test_astradb.py │ │ ├── test_azure_ai_search.py │ │ ├── test_chroma.py │ │ ├── test_confluence.py │ │ ├── test_delta_table.py │ │ ├── test_dropbox.py │ │ ├── test_github.py │ │ ├── test_google_drive.py │ │ ├── test_jira.py │ │ ├── test_lancedb.py │ │ ├── test_milvus.py │ │ ├── test_mongodb.py │ │ ├── test_neo4j.py │ │ ├── test_notion.py │ │ ├── test_onedrive.py │ │ ├── test_pinecone.py │ │ ├── test_qdrant.py │ │ ├── test_redis.py │ │ ├── test_s3.py │ │ ├── test_sharepoint.py │ │ ├── test_vectara.py │ │ ├── test_zendesk.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ ├── docker.py │ │ │ ├── docker_compose.py │ │ │ └── validation │ │ │ │ ├── __init__.py │ │ │ │ ├── destination.py │ │ │ │ ├── equality.py │ │ │ │ ├── source.py │ │ │ │ └── utils.py │ │ └── weaviate │ │ │ ├── __init__.py │ │ │ ├── assets │ │ │ └── elements.json │ │ │ ├── conftest.py │ │ │ ├── test_cloud.py │ │ │ └── test_local.py │ ├── embedders │ │ ├── __init__.py │ │ ├── assets │ │ │ └── DA-1p-with-duplicate-pages.pdf.json │ │ ├── conftest.py │ │ ├── test_azure_openai.py │ │ ├── test_bedrock.py │ │ ├── test_huggingface.py │ │ ├── test_mixedbread.py │ │ ├── test_octoai.py │ │ ├── test_openai.py │ │ ├── test_togetherai.py │ │ ├── test_vertexai.py │ │ ├── test_voyageai.py │ │ └── utils.py │ ├── partitioners │ │ ├── __init__.py │ │ ├── assets │ │ │ ├── DA-1p-with-duplicate-pages.pdf │ │ │ ├── README.md │ │ │ ├── README.rst │ │ │ ├── alert.eml │ │ │ ├── announcement.eml │ │ │ ├── embedded-images-tables.jpg │ │ │ ├── embedded-images-tables.pdf │ │ │ ├── english-and-korean.png │ │ │ ├── fake-doc.rtf │ │ │ ├── fake-email-attachment.eml │ │ │ ├── fake-email-image-embedded.eml │ │ │ ├── fake-email.eml │ │ │ ├── fake-email.msg │ │ │ ├── fake-html.html │ │ │ ├── fake-power-point.ppt │ │ │ ├── fake-power-point.pptx │ │ │ ├── fake-text-utf-32.txt │ │ │ ├── fake-text.txt │ │ │ ├── fake-xml.xml │ │ │ ├── fake.doc │ │ │ ├── fake.docx │ │ │ ├── fake.odt │ │ │ ├── family-day.eml │ │ │ ├── layout-parser-paper-fast.jpg │ │ │ ├── layout-parser-paper-fast.pdf │ │ │ ├── layout-parser-paper-with-table.jpg │ │ │ ├── layout-parser-paper.pdf │ │ │ ├── layout-parser-paper.pdf.gz │ │ │ ├── list-item-example.pdf │ │ │ ├── multi_page_image.tif │ │ │ ├── spring-weather.html.json │ │ │ ├── stanley-cups.csv │ │ │ ├── stanley-cups.tsv │ │ │ ├── stanley-cups.xlsx │ │ │ └── winter-sports.epub │ │ └── test_partitioner.py │ └── utils.py └── unit │ ├── __init__.py │ ├── chunkers │ ├── __init__.py │ └── test_chunkers.py │ ├── cli │ └── test_utils.py │ ├── connectors │ ├── __init__.py │ ├── ibm_watsonx │ │ ├── __init__.py │ │ └── test_ibm_watsonx_s3.py │ ├── motherduck │ │ ├── __init__.py │ │ └── test_base.py │ ├── notion │ │ ├── __init__.py │ │ └── types │ │ │ ├── __init__.py │ │ │ ├── database_properties │ │ │ ├── __init__.py │ │ │ └── test__init__.py │ │ │ └── test_user.py │ ├── sql │ │ ├── __init__.py │ │ ├── test_snowflake.py │ │ └── test_sql.py │ ├── test_confluence.py │ ├── test_json_schema.py │ ├── test_neo4j.py │ ├── test_sharepoint.py │ └── weaviate │ │ └── test_weaviate.py │ ├── embed │ ├── __init__.py │ ├── test_mixedbreadai.py │ ├── test_octoai.py │ ├── test_openai.py │ ├── test_vertexai.py │ └── test_voyageai.py │ ├── embedders │ ├── __init__.py │ ├── test_bedrock.py │ ├── test_huggingface.py │ ├── test_mixedbread.py │ ├── test_octoai.py │ ├── test_openai.py │ ├── test_togetherai.py │ ├── test_vertexai.py │ └── test_voyageai.py │ ├── partitioners │ ├── __init__.py │ └── test_partitioner.py │ ├── processes │ └── connectors │ │ ├── test_astradb.py │ │ └── test_pinecone.py │ ├── test_error.py │ ├── test_html.py │ ├── test_interfaces.py │ ├── test_utils.py │ ├── unstructured │ └── test_chunking_utils.py │ └── utils │ ├── __init__.py │ └── data_generator.py ├── test_e2e ├── .gitignore ├── check-diff-expected-output.py ├── check-num-dirs-output.sh ├── check-num-files-expected-output.sh ├── check-num-files-output.sh ├── clean-permissions-files.sh ├── cleanup.sh ├── dest │ ├── azure.sh │ ├── box.sh │ ├── couchbase.sh │ ├── gcs.sh │ ├── kdbai.sh │ ├── s3.sh │ └── sharepoint-embed-cog-index.sh ├── docker-compose.otel.yml ├── env_setup │ ├── airtable │ │ ├── check-num-rows-and-columns-output.sh │ │ ├── component_ids.sh │ │ ├── create_scale_test_components.py │ │ ├── create_scale_test_components.sh │ │ └── print_num_rows_df.py │ ├── couchbase │ │ ├── common │ │ │ ├── check_cluster_health.py │ │ │ ├── constants.env │ │ │ ├── docker-compose.yaml │ │ │ └── setup_couchbase_cluster.sh │ │ ├── destination_connector │ │ │ └── ingest_destination_setup_cluster.py │ │ └── source_connector │ │ │ ├── airline_sample.jsonlines │ │ │ └── ingest_source_setup_cluster.py │ ├── kafka │ │ ├── create-kafka-instance.sh │ │ └── docker-compose.yml │ ├── kdbai │ │ ├── .gitignore │ │ ├── docker-compose.yml │ │ ├── k4.lic.gpg │ │ ├── provision.py │ │ ├── provision.sh │ │ └── test_output.py │ └── sftp │ │ ├── create-and-check-sftp.sh │ │ ├── docker-compose.yaml │ │ └── folder1 │ │ ├── folder2 │ │ └── wiki_movie_plots_small2.csv │ │ └── wiki_movie_plots_small.csv ├── example-docs │ ├── layout-parser-paper-with-table.jpg │ └── layout-parser-paper.pdf ├── expected-structured-output │ ├── Sharepoint-with-permissions │ │ ├── Document.docx.json │ │ ├── SitePages │ │ │ ├── Home.aspx.json │ │ │ └── This-is-a-title.aspx.json │ │ ├── fake-text.txt.json │ │ ├── ideas-page.html.json │ │ ├── nested │ │ │ ├── 2023-Jan-economic-outlook.pdf.json │ │ │ └── page-with-formula.pdf.json │ │ ├── permissions-fake-text.docx.json │ │ └── stanley-cups.xlsx.json │ ├── Sharepoint │ │ ├── Document.docx.json │ │ ├── SitePages │ │ │ ├── Home.aspx.json │ │ │ └── This-is-a-title.aspx.json │ │ ├── fake-text.txt.json │ │ ├── ideas-page.html.json │ │ ├── nested │ │ │ ├── 2023-Jan-economic-outlook.pdf.json │ │ │ └── page-with-formula.pdf.json │ │ ├── permissions-fake-text.docx.json │ │ └── stanley-cups.xlsx.json │ ├── airtable-diff │ │ ├── app5YQxSfp220fWtm │ │ │ ├── tblBoUk54tWXGqYai.csv.json │ │ │ └── tblxdPc7L2meGIZLE.csv.json │ │ └── appJ43QmP8I17zu88 │ │ │ ├── tblbj2vBlL2dN2xqq.csv.json │ │ │ └── tblfu7DzEcCWNKwP4.csv.json │ ├── astradb │ │ ├── 25b75f1d-a2ea-4c97-b75f-1da2eadc97f7.csv.json │ │ ├── 60297eea-73d7-4fca-a97e-ea73d7cfca62.csv.json │ │ ├── 641d99e3-9941-4c18-9d99-e399414c183d.csv.json │ │ ├── 762c0093-2277-4f3e-ac00-932277af3e0e.csv.json │ │ └── ae40df94-0b3a-4f89-80df-940b3a6f8966.csv.json │ ├── azure │ │ ├── Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json │ │ ├── IRS-form-1987.pdf.json │ │ ├── IRS-form-1987.png.json │ │ ├── rfc854.txt.json │ │ └── spring-weather.html.json │ ├── biomed-api │ │ ├── 65 │ │ │ └── 11 │ │ │ │ └── main.PMC6312790.pdf.json │ │ └── 75 │ │ │ └── 29 │ │ │ └── main.PMC6312793.pdf.json │ ├── biomed-path │ │ └── 07 │ │ │ └── 07 │ │ │ └── sbaa031.073.PMC7234218.pdf.json │ ├── box │ │ ├── handbook-1p.docx.json │ │ ├── nested-1 │ │ │ ├── ideas-page.html.json │ │ │ └── nested-2 │ │ │ │ └── ideas-page.html.json │ │ └── science-exploration-1p.pptx.json │ ├── couchbase │ │ ├── unstructured-10748.txt.json │ │ ├── unstructured-1543.txt.json │ │ ├── unstructured-16264.txt.json │ │ ├── unstructured-1739.txt.json │ │ └── unstructured-3494.txt.json │ ├── delta-table │ │ ├── 0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-0.json │ │ ├── 0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-1.json │ │ ├── 0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-2.json │ │ ├── 0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-3.json │ │ ├── 0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-4.json │ │ ├── 0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-5.json │ │ ├── 0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-6.json │ │ ├── 0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-7.json │ │ ├── 0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-8.json │ │ └── 0-9d594ee0-ad36-4e7e-a6be-f53975fe3d10-9.json │ ├── discord │ │ ├── 1099442333440802930.txt.json │ │ └── 1099601456321003600.txt.json │ ├── dropbox │ │ ├── handbook-1p.docx.json │ │ ├── nested-1 │ │ │ └── ideas-page.html.json │ │ ├── nested-2 │ │ │ └── ideas-page.html.json │ │ └── science-exploration-1p.pptx.json │ ├── elasticsearch │ │ ├── movies-0-57554198.json │ │ ├── movies-1-57554198.json │ │ ├── movies-2-57554198.json │ │ ├── movies-3-57554198.json │ │ ├── movies-4-57554198.json │ │ ├── movies-5-57554198.json │ │ ├── movies-6-57554198.json │ │ ├── movies-7-57554198.json │ │ ├── movies-8-57554198.json │ │ └── movies-9-57554198.json │ ├── gcs │ │ ├── ideas-page.html.json │ │ ├── nested-1 │ │ │ ├── fake-text.txt.json │ │ │ └── nested │ │ │ │ └── ideas-page.html.json │ │ └── nested-2 │ │ │ ├── fake-text.txt.json │ │ │ ├── nested │ │ │ └── ideas-page.html.json │ │ │ └── stanley-cups.xlsx.json │ ├── github │ │ ├── LICENSE.txt.json │ │ └── test.html.json │ ├── gitlab │ │ └── docs │ │ │ └── _index.md.json │ ├── google-drive │ │ ├── fake.docx.json │ │ ├── foo.txt.json │ │ ├── google_workspace_native_doc.json │ │ ├── google_workspace_native_sheet.json │ │ ├── heavy_gif_rich_google_workspace_native_slides.json │ │ └── test-drive-doc.docx.json │ ├── hubspot │ │ ├── calls │ │ │ ├── 41286477879.json │ │ │ └── 41287834137.json │ │ ├── communications │ │ │ ├── 41286064390.json │ │ │ └── 41286467154.json │ │ ├── emails │ │ │ └── 41287832127.json │ │ ├── notes │ │ │ └── 41287833179.json │ │ ├── products │ │ │ ├── 2362691415.json │ │ │ ├── 2362691416.json │ │ │ └── 2362691417.json │ │ └── tickets │ │ │ ├── 1976928828.json │ │ │ ├── 2002294392.json │ │ │ └── 2002301007.json │ ├── onedrive │ │ └── utic-test-ingest-fixtures │ │ │ ├── fake-text.txt.json │ │ │ ├── nested │ │ │ └── fake-text.txt.json │ │ │ └── tests-example.xls.json │ ├── outlook │ │ ├── 21be155fb0c95885.eml.json │ │ ├── 497eba8c81c801c6.eml.json │ │ └── 4a16a411f162ebbb.eml.json │ ├── pdf-fast-reprocess │ │ ├── azure │ │ │ ├── Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json │ │ │ └── IRS-form-1987.pdf.json │ │ ├── biomed-api │ │ │ ├── 65 │ │ │ │ └── 11 │ │ │ │ │ └── main.PMC6312790.pdf.json │ │ │ └── 75 │ │ │ │ └── 29 │ │ │ │ └── main.PMC6312793.pdf.json │ │ ├── biomed-path │ │ │ └── 07 │ │ │ │ └── 07 │ │ │ │ └── sbaa031.073.PMC7234218.pdf.json │ │ ├── s3-filter │ │ │ ├── page-with-formula.pdf.json │ │ │ └── recalibrating-risk-report.pdf.json │ │ └── s3 │ │ │ ├── 2023-Jan-economic-outlook.pdf.json │ │ │ ├── Silent-Giant-(1).pdf.json │ │ │ ├── page-with-formula.pdf.json │ │ │ └── recalibrating-risk-report.pdf.json │ ├── s3 │ │ ├── 2023-Jan-economic-outlook.pdf.json │ │ ├── Silent-Giant-(1).pdf.json │ │ ├── page-with-formula.pdf.json │ │ └── recalibrating-risk-report.pdf.json │ ├── salesforce │ │ ├── Campaign │ │ │ ├── 701Hu000001eX9EIAU.xml.json │ │ │ ├── 701Hu000001eX9FIAU.xml.json │ │ │ ├── 701Hu000001eX9GIAU.xml.json │ │ │ └── 701Hu000001eX9HIAU.xml.json │ │ └── EmailMessage │ │ │ ├── 02sHu00001efErPIAU.eml.json │ │ │ └── 02sHu00001efErQIAU.eml.json │ ├── sftp │ │ └── folder1 │ │ │ ├── folder2 │ │ │ └── wiki_movie_plots_small2.csv.json │ │ │ └── wiki_movie_plots_small.csv.json │ └── slack │ │ └── cce82000be81b105.xml.json ├── failed-partition-docs │ ├── sample.gif │ └── small.txt ├── files │ └── azure_ai_index_schema.json ├── otel_assets │ └── otel-collector-config.yaml ├── python │ ├── test-azure-output.py │ ├── test-gcs-output.py │ └── test-ingest-couchbase-output.py ├── reference_test_scripts │ ├── README.md │ └── pinecone.sh ├── src │ ├── against-api.sh │ ├── airtable-diff.sh │ ├── airtable-large.sh │ ├── azure.sh │ ├── box.sh │ ├── couchbase.sh │ ├── gcs.sh │ ├── gitlab.sh │ ├── google-drive.sh │ ├── outlook.sh │ ├── pdf-fast-reprocess.sh │ ├── s3-compression.sh │ ├── s3-filter.sh │ ├── s3-sample.sh │ ├── s3.sh │ ├── salesforce.sh │ ├── sftp.sh │ └── slack.sh ├── test-dest.sh ├── test-help.py └── test-src.sh ├── unstructured_ingest ├── __init__.py ├── __version__.py ├── cli │ ├── README.md │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ ├── cmd.py │ │ ├── dest.py │ │ ├── importer.py │ │ └── src.py │ ├── cli.py │ ├── cmds.py │ └── utils │ │ ├── __init__.py │ │ ├── click.py │ │ └── model_conversion.py ├── data_types │ ├── __init__.py │ ├── entities.py │ └── file_data.py ├── embed │ ├── __init__.py │ ├── azure_openai.py │ ├── bedrock.py │ ├── huggingface.py │ ├── interfaces.py │ ├── mixedbreadai.py │ ├── octoai.py │ ├── openai.py │ ├── togetherai.py │ ├── vertexai.py │ └── voyageai.py ├── error.py ├── errors_v2.py ├── interfaces │ ├── __init__.py │ ├── connector.py │ ├── downloader.py │ ├── indexer.py │ ├── process.py │ ├── processor.py │ ├── upload_stager.py │ └── uploader.py ├── logger.py ├── main.py ├── otel.py ├── pipeline │ ├── __init__.py │ ├── interfaces.py │ ├── otel.py │ ├── pipeline.py │ └── steps │ │ ├── __init__.py │ │ ├── chunk.py │ │ ├── download.py │ │ ├── embed.py │ │ ├── filter.py │ │ ├── index.py │ │ ├── partition.py │ │ ├── stage.py │ │ ├── uncompress.py │ │ └── upload.py ├── processes │ ├── __init__.py │ ├── chunker.py │ ├── connector_registry.py │ ├── connectors │ │ ├── __init__.py │ │ ├── airtable.py │ │ ├── assets │ │ │ ├── __init__.py │ │ │ ├── databricks_delta_table_schema.sql │ │ │ └── weaviate_collection_config.json │ │ ├── astradb.py │ │ ├── azure_ai_search.py │ │ ├── chroma.py │ │ ├── confluence.py │ │ ├── couchbase.py │ │ ├── databricks │ │ │ ├── __init__.py │ │ │ ├── volumes.py │ │ │ ├── volumes_aws.py │ │ │ ├── volumes_azure.py │ │ │ ├── volumes_gcp.py │ │ │ ├── volumes_native.py │ │ │ └── volumes_table.py │ │ ├── delta_table.py │ │ ├── discord.py │ │ ├── duckdb │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── duckdb.py │ │ │ └── motherduck.py │ │ ├── elasticsearch │ │ │ ├── __init__.py │ │ │ ├── elasticsearch.py │ │ │ └── opensearch.py │ │ ├── fsspec │ │ │ ├── __init__.py │ │ │ ├── azure.py │ │ │ ├── box.py │ │ │ ├── dropbox.py │ │ │ ├── fsspec.py │ │ │ ├── gcs.py │ │ │ ├── s3.py │ │ │ ├── sftp.py │ │ │ └── utils.py │ │ ├── github.py │ │ ├── gitlab.py │ │ ├── google_drive.py │ │ ├── ibm_watsonx │ │ │ ├── __init__.py │ │ │ └── ibm_watsonx_s3.py │ │ ├── jira.py │ │ ├── kafka │ │ │ ├── __init__.py │ │ │ ├── cloud.py │ │ │ ├── kafka.py │ │ │ └── local.py │ │ ├── kdbai.py │ │ ├── lancedb │ │ │ ├── __init__.py │ │ │ ├── aws.py │ │ │ ├── azure.py │ │ │ ├── cloud.py │ │ │ ├── gcp.py │ │ │ ├── lancedb.py │ │ │ └── local.py │ │ ├── local.py │ │ ├── milvus.py │ │ ├── mongodb.py │ │ ├── neo4j.py │ │ ├── notion │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ ├── connector.py │ │ │ ├── helpers.py │ │ │ ├── ingest_backoff │ │ │ │ ├── __init__.py │ │ │ │ ├── _common.py │ │ │ │ ├── _wrapper.py │ │ │ │ └── types.py │ │ │ ├── interfaces.py │ │ │ └── types │ │ │ │ ├── __init__.py │ │ │ │ ├── block.py │ │ │ │ ├── blocks │ │ │ │ ├── __init__.py │ │ │ │ ├── bookmark.py │ │ │ │ ├── breadcrumb.py │ │ │ │ ├── bulleted_list_item.py │ │ │ │ ├── callout.py │ │ │ │ ├── child_database.py │ │ │ │ ├── child_page.py │ │ │ │ ├── code.py │ │ │ │ ├── column_list.py │ │ │ │ ├── divider.py │ │ │ │ ├── embed.py │ │ │ │ ├── equation.py │ │ │ │ ├── file.py │ │ │ │ ├── heading.py │ │ │ │ ├── image.py │ │ │ │ ├── link_preview.py │ │ │ │ ├── link_to_page.py │ │ │ │ ├── numbered_list.py │ │ │ │ ├── paragraph.py │ │ │ │ ├── pdf.py │ │ │ │ ├── quote.py │ │ │ │ ├── synced_block.py │ │ │ │ ├── table.py │ │ │ │ ├── table_of_contents.py │ │ │ │ ├── template.py │ │ │ │ ├── todo.py │ │ │ │ ├── toggle.py │ │ │ │ ├── unsupported.py │ │ │ │ └── video.py │ │ │ │ ├── database.py │ │ │ │ ├── database_properties │ │ │ │ ├── __init__.py │ │ │ │ ├── checkbox.py │ │ │ │ ├── created_by.py │ │ │ │ ├── created_time.py │ │ │ │ ├── date.py │ │ │ │ ├── email.py │ │ │ │ ├── files.py │ │ │ │ ├── formula.py │ │ │ │ ├── last_edited_by.py │ │ │ │ ├── last_edited_time.py │ │ │ │ ├── multiselect.py │ │ │ │ ├── number.py │ │ │ │ ├── people.py │ │ │ │ ├── phone_number.py │ │ │ │ ├── relation.py │ │ │ │ ├── rich_text.py │ │ │ │ ├── rollup.py │ │ │ │ ├── select.py │ │ │ │ ├── status.py │ │ │ │ ├── title.py │ │ │ │ ├── unique_id.py │ │ │ │ ├── url.py │ │ │ │ └── verification.py │ │ │ │ ├── date.py │ │ │ │ ├── file.py │ │ │ │ ├── page.py │ │ │ │ ├── parent.py │ │ │ │ ├── rich_text.py │ │ │ │ └── user.py │ │ ├── onedrive.py │ │ ├── outlook.py │ │ ├── pinecone.py │ │ ├── qdrant │ │ │ ├── __init__.py │ │ │ ├── cloud.py │ │ │ ├── local.py │ │ │ ├── qdrant.py │ │ │ └── server.py │ │ ├── redisdb.py │ │ ├── salesforce.py │ │ ├── sharepoint.py │ │ ├── slack.py │ │ ├── sql │ │ │ ├── __init__.py │ │ │ ├── databricks_delta_tables.py │ │ │ ├── postgres.py │ │ │ ├── singlestore.py │ │ │ ├── snowflake.py │ │ │ ├── sql.py │ │ │ ├── sqlite.py │ │ │ └── vastdb.py │ │ ├── utils.py │ │ ├── vectara.py │ │ ├── weaviate │ │ │ ├── __init__.py │ │ │ ├── cloud.py │ │ │ ├── embedded.py │ │ │ ├── local.py │ │ │ └── weaviate.py │ │ └── zendesk │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ └── zendesk.py │ ├── embedder.py │ ├── filter.py │ ├── partitioner.py │ ├── uncompress.py │ └── utils │ │ ├── __init__.py │ │ ├── blob_storage.py │ │ └── logging │ │ ├── connector.py │ │ └── sanitizer.py ├── unstructured_api.py └── utils │ ├── __init__.py │ ├── chunking.py │ ├── compression.py │ ├── constants.py │ ├── data_prep.py │ ├── dep_check.py │ ├── filesystem.py │ ├── html.py │ ├── ndjson.py │ ├── pydantic_models.py │ ├── string_and_date_utils.py │ ├── table.py │ └── tls.py └── uv.lock /.github/actions/generate-cache-key/action.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/.github/actions/generate-cache-key/action.yml -------------------------------------------------------------------------------- /.github/actions/setup-environment/action.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/.github/actions/setup-environment/action.yml -------------------------------------------------------------------------------- /.github/actions/setup-python/action.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/.github/actions/setup-python/action.yml -------------------------------------------------------------------------------- /.github/workflows/claude.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/.github/workflows/claude.yml -------------------------------------------------------------------------------- /.github/workflows/codeflash.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/.github/workflows/codeflash.yml -------------------------------------------------------------------------------- /.github/workflows/e2e.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/.github/workflows/e2e.yml -------------------------------------------------------------------------------- /.github/workflows/ingest-test-fixtures-update-pr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/.github/workflows/ingest-test-fixtures-update-pr.yml -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/.github/workflows/release.yml -------------------------------------------------------------------------------- /.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/.github/workflows/unit_tests.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/.gitignore -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/LICENSE.md -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/README.md -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/assets/pipeline.excalidraw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/docs/assets/pipeline.excalidraw -------------------------------------------------------------------------------- /docs/assets/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/docs/assets/pipeline.png -------------------------------------------------------------------------------- /docs/assets/sequence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/docs/assets/sequence.png -------------------------------------------------------------------------------- /docs/assets/sequence.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/docs/assets/sequence.txt -------------------------------------------------------------------------------- /docs/assets/unstructured_ingest_cli_pipeline_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/docs/assets/unstructured_ingest_cli_pipeline_diagram.png -------------------------------------------------------------------------------- /docs/connector_development.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/docs/connector_development.md -------------------------------------------------------------------------------- /example-docs/2023-half-year-analyses-by-segment.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/2023-half-year-analyses-by-segment.xlsx -------------------------------------------------------------------------------- /example-docs/CantinaBand3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/CantinaBand3.wav -------------------------------------------------------------------------------- /example-docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/README.md -------------------------------------------------------------------------------- /example-docs/README.org: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/README.org -------------------------------------------------------------------------------- /example-docs/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/README.rst -------------------------------------------------------------------------------- /example-docs/book-war-and-peace-1225p.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/book-war-and-peace-1225p.txt -------------------------------------------------------------------------------- /example-docs/book-war-and-peace-1p.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/book-war-and-peace-1p.txt -------------------------------------------------------------------------------- /example-docs/category-level.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/category-level.docx -------------------------------------------------------------------------------- /example-docs/contains-pictures.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/contains-pictures.docx -------------------------------------------------------------------------------- /example-docs/csv-with-escaped-commas.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/csv-with-escaped-commas.csv -------------------------------------------------------------------------------- /example-docs/csv-with-long-lines.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/csv-with-long-lines.csv -------------------------------------------------------------------------------- /example-docs/docx-hdrftr.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/docx-hdrftr.docx -------------------------------------------------------------------------------- /example-docs/docx-shapes.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/docx-shapes.docx -------------------------------------------------------------------------------- /example-docs/docx-tables.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/docx-tables.docx -------------------------------------------------------------------------------- /example-docs/duplicate-paragraphs.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/duplicate-paragraphs.doc -------------------------------------------------------------------------------- /example-docs/duplicate-paragraphs.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/duplicate-paragraphs.docx -------------------------------------------------------------------------------- /example-docs/eml/email-equals-attachment-filename.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-equals-attachment-filename.eml -------------------------------------------------------------------------------- /example-docs/eml/email-inline-content-disposition.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-inline-content-disposition.eml -------------------------------------------------------------------------------- /example-docs/eml/email-no-html-content-1.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-no-html-content-1.eml -------------------------------------------------------------------------------- /example-docs/eml/email-no-utf8-2008-07-16.062410.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-no-utf8-2008-07-16.062410.eml -------------------------------------------------------------------------------- /example-docs/eml/email-no-utf8-2014-03-17.111517.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-no-utf8-2014-03-17.111517.eml -------------------------------------------------------------------------------- /example-docs/eml/email-replace-mime-encodings-error-1.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-replace-mime-encodings-error-1.eml -------------------------------------------------------------------------------- /example-docs/eml/email-replace-mime-encodings-error-2.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-replace-mime-encodings-error-2.eml -------------------------------------------------------------------------------- /example-docs/eml/email-replace-mime-encodings-error-3.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-replace-mime-encodings-error-3.eml -------------------------------------------------------------------------------- /example-docs/eml/email-replace-mime-encodings-error-4.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-replace-mime-encodings-error-4.eml -------------------------------------------------------------------------------- /example-docs/eml/email-replace-mime-encodings-error-5.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-replace-mime-encodings-error-5.eml -------------------------------------------------------------------------------- /example-docs/eml/email-with-image.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/email-with-image.eml -------------------------------------------------------------------------------- /example-docs/eml/fake-email-attachment.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-email-attachment.eml -------------------------------------------------------------------------------- /example-docs/eml/fake-email-b64.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-email-b64.eml -------------------------------------------------------------------------------- /example-docs/eml/fake-email-header.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-email-header.eml -------------------------------------------------------------------------------- /example-docs/eml/fake-email-image-embedded.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-email-image-embedded.eml -------------------------------------------------------------------------------- /example-docs/eml/fake-email-malformed-encoding.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-email-malformed-encoding.eml -------------------------------------------------------------------------------- /example-docs/eml/fake-email-utf-16-be.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-email-utf-16-be.eml -------------------------------------------------------------------------------- /example-docs/eml/fake-email-utf-16-le.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-email-utf-16-le.eml -------------------------------------------------------------------------------- /example-docs/eml/fake-email-utf-16.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-email-utf-16.eml -------------------------------------------------------------------------------- /example-docs/eml/fake-email.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-email.eml -------------------------------------------------------------------------------- /example-docs/eml/fake-email.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-email.txt -------------------------------------------------------------------------------- /example-docs/eml/fake-encrypted.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/fake-encrypted.eml -------------------------------------------------------------------------------- /example-docs/eml/signed-doc.p7s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/eml/signed-doc.p7s -------------------------------------------------------------------------------- /example-docs/emoji.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/emoji.xlsx -------------------------------------------------------------------------------- /example-docs/empty.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /example-docs/example-10k-1p.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/example-10k-1p.html -------------------------------------------------------------------------------- /example-docs/example-10k-230p.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/example-10k-230p.html -------------------------------------------------------------------------------- /example-docs/example-10k-utf-16.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/example-10k-utf-16.html -------------------------------------------------------------------------------- /example-docs/example-10k.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/example-10k.html -------------------------------------------------------------------------------- /example-docs/example-list-items-multiple.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/example-list-items-multiple.docx -------------------------------------------------------------------------------- /example-docs/example-steelJIS-datasheet-utf-16.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/example-steelJIS-datasheet-utf-16.html -------------------------------------------------------------------------------- /example-docs/example-steelJIS-datasheet.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/example-steelJIS-datasheet.html -------------------------------------------------------------------------------- /example-docs/example-with-scripts.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/example-with-scripts.html -------------------------------------------------------------------------------- /example-docs/factbook-utf-16.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/factbook-utf-16.xml -------------------------------------------------------------------------------- /example-docs/factbook.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/factbook.xml -------------------------------------------------------------------------------- /example-docs/fake-doc-emphasized-text.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-doc-emphasized-text.doc -------------------------------------------------------------------------------- /example-docs/fake-doc-emphasized-text.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-doc-emphasized-text.docx -------------------------------------------------------------------------------- /example-docs/fake-doc.rtf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-doc.rtf -------------------------------------------------------------------------------- /example-docs/fake-email-attachment.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-email-attachment.msg -------------------------------------------------------------------------------- /example-docs/fake-email-multiple-attachments.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-email-multiple-attachments.msg -------------------------------------------------------------------------------- /example-docs/fake-email.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-email.eml -------------------------------------------------------------------------------- /example-docs/fake-email.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-email.msg -------------------------------------------------------------------------------- /example-docs/fake-email.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-email.txt -------------------------------------------------------------------------------- /example-docs/fake-encrypted.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-encrypted.msg -------------------------------------------------------------------------------- /example-docs/fake-html-cp1252.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-html-cp1252.html -------------------------------------------------------------------------------- /example-docs/fake-html-lang-de.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-html-lang-de.html -------------------------------------------------------------------------------- /example-docs/fake-html-pre.htm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-html-pre.htm -------------------------------------------------------------------------------- /example-docs/fake-html-with-duplicate-elements.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-html-with-duplicate-elements.html -------------------------------------------------------------------------------- /example-docs/fake-html-with-footer-and-header.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-html-with-footer-and-header.html -------------------------------------------------------------------------------- /example-docs/fake-html.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-html.html -------------------------------------------------------------------------------- /example-docs/fake-incomplete-json.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-incomplete-json.txt -------------------------------------------------------------------------------- /example-docs/fake-power-point-malformed.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-power-point-malformed.pptx -------------------------------------------------------------------------------- /example-docs/fake-power-point-many-pages.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-power-point-many-pages.pptx -------------------------------------------------------------------------------- /example-docs/fake-power-point-table.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-power-point-table.pptx -------------------------------------------------------------------------------- /example-docs/fake-power-point.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-power-point.ppt -------------------------------------------------------------------------------- /example-docs/fake-power-point.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-power-point.pptx -------------------------------------------------------------------------------- /example-docs/fake-text-utf-16-be.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-text-utf-16-be.txt -------------------------------------------------------------------------------- /example-docs/fake-text-utf-16-le.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-text-utf-16-le.txt -------------------------------------------------------------------------------- /example-docs/fake-text-utf-16.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-text-utf-16.txt -------------------------------------------------------------------------------- /example-docs/fake-text-utf-32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-text-utf-32.txt -------------------------------------------------------------------------------- /example-docs/fake-text.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake-text.txt -------------------------------------------------------------------------------- /example-docs/fake.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake.doc -------------------------------------------------------------------------------- /example-docs/fake.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake.docx -------------------------------------------------------------------------------- /example-docs/fake.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake.go -------------------------------------------------------------------------------- /example-docs/fake.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake.odt -------------------------------------------------------------------------------- /example-docs/fake_table.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/fake_table.docx -------------------------------------------------------------------------------- /example-docs/group-shapes-nested.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/group-shapes-nested.pptx -------------------------------------------------------------------------------- /example-docs/handbook-1p-no-rendered-page-breaks.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/handbook-1p-no-rendered-page-breaks.docx -------------------------------------------------------------------------------- /example-docs/handbook-1p.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/handbook-1p.docx -------------------------------------------------------------------------------- /example-docs/handbook-872p.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/handbook-872p.docx -------------------------------------------------------------------------------- /example-docs/hebrew-text-base64-iso88598i.txt: -------------------------------------------------------------------------------- 1 | 8uPr5e8gVGVzbGEgLSDw4/j5+iDn+unu5CDy7CDk4ff55CDs+On55e0g -------------------------------------------------------------------------------- /example-docs/hlink-meta.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/hlink-meta.docx -------------------------------------------------------------------------------- /example-docs/ideas-page.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/ideas-page.html -------------------------------------------------------------------------------- /example-docs/img/DA-1p.heic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/DA-1p.heic -------------------------------------------------------------------------------- /example-docs/img/DA-1p.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/DA-1p.jpg -------------------------------------------------------------------------------- /example-docs/img/DA-1p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/DA-1p.png -------------------------------------------------------------------------------- /example-docs/img/bmp_24.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/bmp_24.bmp -------------------------------------------------------------------------------- /example-docs/img/chi_sim_image.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/chi_sim_image.jpeg -------------------------------------------------------------------------------- /example-docs/img/double-column-A.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/double-column-A.jpg -------------------------------------------------------------------------------- /example-docs/img/double-column-B.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/double-column-B.jpg -------------------------------------------------------------------------------- /example-docs/img/embedded-images-tables.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/embedded-images-tables.jpg -------------------------------------------------------------------------------- /example-docs/img/english-and-korean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/english-and-korean.png -------------------------------------------------------------------------------- /example-docs/img/example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/example.jpg -------------------------------------------------------------------------------- /example-docs/img/jpn-vert.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/jpn-vert.jpeg -------------------------------------------------------------------------------- /example-docs/img/layout-parser-paper-10p.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/layout-parser-paper-10p.jpg -------------------------------------------------------------------------------- /example-docs/img/layout-parser-paper-combined.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/layout-parser-paper-combined.tiff -------------------------------------------------------------------------------- /example-docs/img/layout-parser-paper-fast.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/layout-parser-paper-fast.jpg -------------------------------------------------------------------------------- /example-docs/img/layout-parser-paper-fast.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/layout-parser-paper-fast.tiff -------------------------------------------------------------------------------- /example-docs/img/layout-parser-paper-with-table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/layout-parser-paper-with-table.jpg -------------------------------------------------------------------------------- /example-docs/img/table-multi-row-column-cells.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/img/table-multi-row-column-cells.png -------------------------------------------------------------------------------- /example-docs/language-docs/UDHR_first_article_all.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/UDHR_first_article_all.txt -------------------------------------------------------------------------------- /example-docs/language-docs/eng_afr_spa.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_afr_spa.txt -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa.txt -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa.xlsx -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.doc -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.docx -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.eml -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.epub -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.html -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.md -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.odt -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.org: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.org -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.ppt -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.pptx -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.rst -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.rtf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.rtf -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.txt -------------------------------------------------------------------------------- /example-docs/language-docs/eng_spa_mult.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/language-docs/eng_spa_mult.xml -------------------------------------------------------------------------------- /example-docs/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/logger.py -------------------------------------------------------------------------------- /example-docs/more-than-1k-cells.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/more-than-1k-cells.xlsx -------------------------------------------------------------------------------- /example-docs/norwich-city.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/norwich-city.txt -------------------------------------------------------------------------------- /example-docs/page-breaks.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/page-breaks.docx -------------------------------------------------------------------------------- /example-docs/pdf/DA-1p.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/DA-1p.pdf -------------------------------------------------------------------------------- /example-docs/pdf/DA-619p.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/DA-619p.pdf -------------------------------------------------------------------------------- /example-docs/pdf/a1977-backus-p21.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/a1977-backus-p21.pdf -------------------------------------------------------------------------------- /example-docs/pdf/all-number-table.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/all-number-table.pdf -------------------------------------------------------------------------------- /example-docs/pdf/chevron-page.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/chevron-page.pdf -------------------------------------------------------------------------------- /example-docs/pdf/copy-protected.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/copy-protected.pdf -------------------------------------------------------------------------------- /example-docs/pdf/embedded-images-tables.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/embedded-images-tables.pdf -------------------------------------------------------------------------------- /example-docs/pdf/embedded-images.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/embedded-images.pdf -------------------------------------------------------------------------------- /example-docs/pdf/embedded-link.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/embedded-link.pdf -------------------------------------------------------------------------------- /example-docs/pdf/emphasis-text.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/emphasis-text.pdf -------------------------------------------------------------------------------- /example-docs/pdf/failure-after-repair.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/failure-after-repair.pdf -------------------------------------------------------------------------------- /example-docs/pdf/fake-memo-with-duplicate-page.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/fake-memo-with-duplicate-page.pdf -------------------------------------------------------------------------------- /example-docs/pdf/fake-memo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/fake-memo.pdf -------------------------------------------------------------------------------- /example-docs/pdf/header-test-doc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/header-test-doc.pdf -------------------------------------------------------------------------------- /example-docs/pdf/interface-config-guide-p93.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/interface-config-guide-p93.pdf -------------------------------------------------------------------------------- /example-docs/pdf/invalid-pdf-structure-pdfminer-entire-doc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/invalid-pdf-structure-pdfminer-entire-doc.pdf -------------------------------------------------------------------------------- /example-docs/pdf/invalid-pdf-structure-pdfminer-one-page.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/invalid-pdf-structure-pdfminer-one-page.pdf -------------------------------------------------------------------------------- /example-docs/pdf/korean-text-with-tables.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/korean-text-with-tables.pdf -------------------------------------------------------------------------------- /example-docs/pdf/layout-parser-paper-fast.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/layout-parser-paper-fast.pdf -------------------------------------------------------------------------------- /example-docs/pdf/layout-parser-paper-with-empty-pages.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/layout-parser-paper-with-empty-pages.pdf -------------------------------------------------------------------------------- /example-docs/pdf/layout-parser-paper-with-table.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/layout-parser-paper-with-table.pdf -------------------------------------------------------------------------------- /example-docs/pdf/layout-parser-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/layout-parser-paper.pdf -------------------------------------------------------------------------------- /example-docs/pdf/list-item-example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/list-item-example.pdf -------------------------------------------------------------------------------- /example-docs/pdf/loremipsum-flat.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/loremipsum-flat.pdf -------------------------------------------------------------------------------- /example-docs/pdf/multi-column-2p.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/multi-column-2p.pdf -------------------------------------------------------------------------------- /example-docs/pdf/multi-column.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/multi-column.pdf -------------------------------------------------------------------------------- /example-docs/pdf/negative-coords.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/negative-coords.pdf -------------------------------------------------------------------------------- /example-docs/pdf/pdf-bad-color-space.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/pdf-bad-color-space.pdf -------------------------------------------------------------------------------- /example-docs/pdf/pdf2image-memory-error-test-400p.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/pdf2image-memory-error-test-400p.pdf -------------------------------------------------------------------------------- /example-docs/pdf/reliance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/reliance.pdf -------------------------------------------------------------------------------- /example-docs/pdf/table-multi-row-column-cells.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/pdf/table-multi-row-column-cells.pdf -------------------------------------------------------------------------------- /example-docs/picture.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/picture.pptx -------------------------------------------------------------------------------- /example-docs/sample-presentation.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/sample-presentation.pptx -------------------------------------------------------------------------------- /example-docs/science-exploration-1p.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/science-exploration-1p.pptx -------------------------------------------------------------------------------- /example-docs/science-exploration-369p.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/science-exploration-369p.pptx -------------------------------------------------------------------------------- /example-docs/simple-table.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/simple-table.md -------------------------------------------------------------------------------- /example-docs/simple.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/simple.doc -------------------------------------------------------------------------------- /example-docs/simple.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/simple.docx -------------------------------------------------------------------------------- /example-docs/simple.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/simple.json -------------------------------------------------------------------------------- /example-docs/simple.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/simple.odt -------------------------------------------------------------------------------- /example-docs/simple.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/simple.yaml -------------------------------------------------------------------------------- /example-docs/simple.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/simple.zip -------------------------------------------------------------------------------- /example-docs/spring-weather.html.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/spring-weather.html.json -------------------------------------------------------------------------------- /example-docs/stanley-cups-with-emoji.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/stanley-cups-with-emoji.csv -------------------------------------------------------------------------------- /example-docs/stanley-cups-with-emoji.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/stanley-cups-with-emoji.tsv -------------------------------------------------------------------------------- /example-docs/stanley-cups.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/stanley-cups.csv -------------------------------------------------------------------------------- /example-docs/stanley-cups.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/stanley-cups.tsv -------------------------------------------------------------------------------- /example-docs/stanley-cups.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/stanley-cups.xlsx -------------------------------------------------------------------------------- /example-docs/table-multi-row-column-cells-actual.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/table-multi-row-column-cells-actual.csv -------------------------------------------------------------------------------- /example-docs/table-semicolon-delimiter.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/table-semicolon-delimiter.csv -------------------------------------------------------------------------------- /example-docs/tables-with-incomplete-rows.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/tables-with-incomplete-rows.docx -------------------------------------------------------------------------------- /example-docs/teams_chat.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/teams_chat.docx -------------------------------------------------------------------------------- /example-docs/test_evaluate_files/filter_list.txt: -------------------------------------------------------------------------------- 1 | Bank Good Credit Loan.pptx 2 | Performance-Audit-Discussion.pdf 3 | -------------------------------------------------------------------------------- /example-docs/test_evaluate_files/unstructured_output/form.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/test_evaluate_files/unstructured_output/form.json -------------------------------------------------------------------------------- /example-docs/tests-example.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/tests-example.xls -------------------------------------------------------------------------------- /example-docs/unsupported/factbook.xsl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/unsupported/factbook.xsl -------------------------------------------------------------------------------- /example-docs/vodafone.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/vodafone.xlsx -------------------------------------------------------------------------------- /example-docs/winter-sports.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/winter-sports.epub -------------------------------------------------------------------------------- /example-docs/xlsx-subtable-cases.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/example-docs/xlsx-subtable-cases.xlsx -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/airtable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/airtable.py -------------------------------------------------------------------------------- /examples/azure_cognitive_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/azure_cognitive_search.py -------------------------------------------------------------------------------- /examples/chroma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/chroma.py -------------------------------------------------------------------------------- /examples/couchbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/couchbase.py -------------------------------------------------------------------------------- /examples/databricks_volumes_dest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/databricks_volumes_dest.py -------------------------------------------------------------------------------- /examples/databricks_volumes_source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/databricks_volumes_source.py -------------------------------------------------------------------------------- /examples/delta_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/delta_table.py -------------------------------------------------------------------------------- /examples/discord_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/discord_example.py -------------------------------------------------------------------------------- /examples/elasticsearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/elasticsearch.py -------------------------------------------------------------------------------- /examples/google_drive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/google_drive.py -------------------------------------------------------------------------------- /examples/kdbai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/kdbai.py -------------------------------------------------------------------------------- /examples/local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/local.py -------------------------------------------------------------------------------- /examples/milvus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/milvus.py -------------------------------------------------------------------------------- /examples/mongodb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/mongodb.py -------------------------------------------------------------------------------- /examples/notebooks/databricks_delta_tables.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/notebooks/databricks_delta_tables.ipynb -------------------------------------------------------------------------------- /examples/opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/opensearch.py -------------------------------------------------------------------------------- /examples/pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/pinecone.py -------------------------------------------------------------------------------- /examples/s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/s3.py -------------------------------------------------------------------------------- /examples/salesforce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/salesforce.py -------------------------------------------------------------------------------- /examples/sharepoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/sharepoint.py -------------------------------------------------------------------------------- /examples/singlestore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/singlestore.py -------------------------------------------------------------------------------- /examples/sql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/sql.py -------------------------------------------------------------------------------- /examples/vectara.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/vectara.py -------------------------------------------------------------------------------- /examples/weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/examples/weaviate.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/base.txt -------------------------------------------------------------------------------- /requirements/connectors/airtable.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | pyairtable 3 | -------------------------------------------------------------------------------- /requirements/connectors/astradb.txt: -------------------------------------------------------------------------------- 1 | astrapy>2.0.0 2 | -------------------------------------------------------------------------------- /requirements/connectors/azure-ai-search.txt: -------------------------------------------------------------------------------- 1 | azure-search-documents 2 | -------------------------------------------------------------------------------- /requirements/connectors/azure.txt: -------------------------------------------------------------------------------- 1 | adlfs 2 | fsspec 3 | -------------------------------------------------------------------------------- /requirements/connectors/biomed.txt: -------------------------------------------------------------------------------- 1 | bs4 2 | requests 3 | 4 | -------------------------------------------------------------------------------- /requirements/connectors/box.txt: -------------------------------------------------------------------------------- 1 | boxfs 2 | fsspec 3 | -------------------------------------------------------------------------------- /requirements/connectors/chroma.txt: -------------------------------------------------------------------------------- 1 | chromadb 2 | -------------------------------------------------------------------------------- /requirements/connectors/clarifai.txt: -------------------------------------------------------------------------------- 1 | clarifai 2 | -------------------------------------------------------------------------------- /requirements/connectors/confluence.txt: -------------------------------------------------------------------------------- 1 | atlassian-python-api 2 | requests 3 | -------------------------------------------------------------------------------- /requirements/connectors/couchbase.txt: -------------------------------------------------------------------------------- 1 | couchbase 2 | -------------------------------------------------------------------------------- /requirements/connectors/databricks-delta-tables.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | databricks-sql-connector 3 | -------------------------------------------------------------------------------- /requirements/connectors/databricks-volumes.txt: -------------------------------------------------------------------------------- 1 | databricks-sdk>=0.70.0 2 | -------------------------------------------------------------------------------- /requirements/connectors/delta-table.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | deltalake 3 | boto3 4 | tenacity 5 | pyarrow 6 | -------------------------------------------------------------------------------- /requirements/connectors/discord.txt: -------------------------------------------------------------------------------- 1 | discord.py 2 | -------------------------------------------------------------------------------- /requirements/connectors/dropbox.txt: -------------------------------------------------------------------------------- 1 | dropboxdrivefs 2 | fsspec 3 | -------------------------------------------------------------------------------- /requirements/connectors/duckdb.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | duckdb 3 | -------------------------------------------------------------------------------- /requirements/connectors/elasticsearch.txt: -------------------------------------------------------------------------------- 1 | elasticsearch[async]<9.0.0 2 | -------------------------------------------------------------------------------- /requirements/connectors/gcs.txt: -------------------------------------------------------------------------------- 1 | gcsfs 2 | fsspec 3 | bs4 4 | -------------------------------------------------------------------------------- /requirements/connectors/github.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/connectors/github.txt -------------------------------------------------------------------------------- /requirements/connectors/gitlab.txt: -------------------------------------------------------------------------------- 1 | python-gitlab 2 | -------------------------------------------------------------------------------- /requirements/connectors/google-drive.txt: -------------------------------------------------------------------------------- 1 | google-api-python-client 2 | tenacity -------------------------------------------------------------------------------- /requirements/connectors/hubspot.txt: -------------------------------------------------------------------------------- 1 | hubspot-api-client 2 | urllib3 3 | -------------------------------------------------------------------------------- /requirements/connectors/ibm-watsonx-s3.txt: -------------------------------------------------------------------------------- 1 | pyiceberg 2 | httpx 3 | pyarrow 4 | tenacity 5 | pandas 6 | -------------------------------------------------------------------------------- /requirements/connectors/jira.txt: -------------------------------------------------------------------------------- 1 | atlassian-python-api 2 | -------------------------------------------------------------------------------- /requirements/connectors/kafka.txt: -------------------------------------------------------------------------------- 1 | confluent-kafka 2 | -------------------------------------------------------------------------------- /requirements/connectors/kdbai.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | kdbai-client>=1.4.0 3 | -------------------------------------------------------------------------------- /requirements/connectors/lancedb.txt: -------------------------------------------------------------------------------- 1 | lancedb 2 | -------------------------------------------------------------------------------- /requirements/connectors/milvus.txt: -------------------------------------------------------------------------------- 1 | pymilvus 2 | -------------------------------------------------------------------------------- /requirements/connectors/mongodb.txt: -------------------------------------------------------------------------------- 1 | pymongo 2 | -------------------------------------------------------------------------------- /requirements/connectors/neo4j.txt: -------------------------------------------------------------------------------- 1 | neo4j-rust-ext 2 | cymple 3 | networkx -------------------------------------------------------------------------------- /requirements/connectors/notion.txt: -------------------------------------------------------------------------------- 1 | notion-client 2 | htmlBuilder 3 | httpx 4 | backoff 5 | -------------------------------------------------------------------------------- /requirements/connectors/onedrive.txt: -------------------------------------------------------------------------------- 1 | msal 2 | Office365-REST-Python-Client 3 | requests 4 | -------------------------------------------------------------------------------- /requirements/connectors/opensearch.txt: -------------------------------------------------------------------------------- 1 | opensearch-py 2 | -------------------------------------------------------------------------------- /requirements/connectors/outlook.txt: -------------------------------------------------------------------------------- 1 | msal 2 | Office365-REST-Python-Client 3 | -------------------------------------------------------------------------------- /requirements/connectors/pinecone.txt: -------------------------------------------------------------------------------- 1 | pinecone 2 | -------------------------------------------------------------------------------- /requirements/connectors/postgres.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | psycopg2-binary 3 | -------------------------------------------------------------------------------- /requirements/connectors/qdrant.txt: -------------------------------------------------------------------------------- 1 | qdrant-client 2 | -------------------------------------------------------------------------------- /requirements/connectors/reddit.txt: -------------------------------------------------------------------------------- 1 | praw 2 | -------------------------------------------------------------------------------- /requirements/connectors/redis.txt: -------------------------------------------------------------------------------- 1 | redis<=5.3.0 2 | -------------------------------------------------------------------------------- /requirements/connectors/s3.txt: -------------------------------------------------------------------------------- 1 | s3fs 2 | fsspec 3 | -------------------------------------------------------------------------------- /requirements/connectors/salesforce.txt: -------------------------------------------------------------------------------- 1 | simple-salesforce 2 | -------------------------------------------------------------------------------- /requirements/connectors/sftp.txt: -------------------------------------------------------------------------------- 1 | fsspec 2 | paramiko 3 | -------------------------------------------------------------------------------- /requirements/connectors/sharepoint.txt: -------------------------------------------------------------------------------- 1 | msal 2 | Office365-REST-Python-Client 3 | requests 4 | -------------------------------------------------------------------------------- /requirements/connectors/singlestore.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | singlestoredb 3 | -------------------------------------------------------------------------------- /requirements/connectors/slack.txt: -------------------------------------------------------------------------------- 1 | slack_sdk[optional] 2 | -------------------------------------------------------------------------------- /requirements/connectors/snowflake.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | snowflake-connector-python 3 | psycopg2-binary 4 | -------------------------------------------------------------------------------- /requirements/connectors/vastdb.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | pyarrow 3 | vastdb 4 | ibis 5 | -------------------------------------------------------------------------------- /requirements/connectors/vectara.txt: -------------------------------------------------------------------------------- 1 | requests 2 | aiofiles 3 | httpx 4 | -------------------------------------------------------------------------------- /requirements/connectors/weaviate.txt: -------------------------------------------------------------------------------- 1 | weaviate-client 2 | -------------------------------------------------------------------------------- /requirements/connectors/wikipedia.txt: -------------------------------------------------------------------------------- 1 | wikipedia 2 | -------------------------------------------------------------------------------- /requirements/connectors/zendesk.txt: -------------------------------------------------------------------------------- 1 | httpx 2 | aiofiles 3 | bs4 4 | -------------------------------------------------------------------------------- /requirements/embed/bedrock.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/embed/bedrock.txt -------------------------------------------------------------------------------- /requirements/embed/huggingface.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/embed/huggingface.txt -------------------------------------------------------------------------------- /requirements/embed/mixedbreadai.txt: -------------------------------------------------------------------------------- 1 | -c ../common/constraints.txt 2 | 3 | mixedbread 4 | -------------------------------------------------------------------------------- /requirements/embed/octoai.txt: -------------------------------------------------------------------------------- 1 | -c ../common/constraints.txt 2 | 3 | openai 4 | tiktoken 5 | -------------------------------------------------------------------------------- /requirements/embed/openai.txt: -------------------------------------------------------------------------------- 1 | -c ../common/constraints.txt 2 | 3 | openai 4 | tiktoken 5 | -------------------------------------------------------------------------------- /requirements/embed/togetherai.txt: -------------------------------------------------------------------------------- 1 | -c ../common/constraints.txt 2 | 3 | together 4 | -------------------------------------------------------------------------------- /requirements/embed/vertexai.txt: -------------------------------------------------------------------------------- 1 | -c ../common/constraints.txt 2 | 3 | vertexai 4 | -------------------------------------------------------------------------------- /requirements/embed/voyageai.txt: -------------------------------------------------------------------------------- 1 | -c ../common/constraints.txt 2 | 3 | voyageai 4 | -------------------------------------------------------------------------------- /requirements/local_partition/doc.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/doc.txt -------------------------------------------------------------------------------- /requirements/local_partition/docx.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/docx.txt -------------------------------------------------------------------------------- /requirements/local_partition/epub.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/epub.txt -------------------------------------------------------------------------------- /requirements/local_partition/image.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/image.txt -------------------------------------------------------------------------------- /requirements/local_partition/md.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/md.txt -------------------------------------------------------------------------------- /requirements/local_partition/msg.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/msg.txt -------------------------------------------------------------------------------- /requirements/local_partition/odt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/odt.txt -------------------------------------------------------------------------------- /requirements/local_partition/org.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/org.txt -------------------------------------------------------------------------------- /requirements/local_partition/pdf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/pdf.txt -------------------------------------------------------------------------------- /requirements/local_partition/ppt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/ppt.txt -------------------------------------------------------------------------------- /requirements/local_partition/pptx.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/pptx.txt -------------------------------------------------------------------------------- /requirements/local_partition/rst.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/rst.txt -------------------------------------------------------------------------------- /requirements/local_partition/rtf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/rtf.txt -------------------------------------------------------------------------------- /requirements/local_partition/tsv.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/tsv.txt -------------------------------------------------------------------------------- /requirements/local_partition/xlsx.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/requirements/local_partition/xlsx.txt -------------------------------------------------------------------------------- /requirements/remote/client.txt: -------------------------------------------------------------------------------- 1 | unstructured-client >= 0.30.0 2 | -------------------------------------------------------------------------------- /scripts/check_untagged_tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/scripts/check_untagged_tests.sh -------------------------------------------------------------------------------- /scripts/install-docker-compose.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/scripts/install-docker-compose.sh -------------------------------------------------------------------------------- /scripts/parse_pytest_report.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/scripts/parse_pytest_report.py -------------------------------------------------------------------------------- /scripts/pip-compile.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/scripts/pip-compile.sh -------------------------------------------------------------------------------- /scripts/version-sync.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/scripts/version-sync.sh -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/chunkers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/chunkers/assets/README.md.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/README.md.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/README.rst.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/README.rst.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/alert.eml.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/alert.eml.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/announcement.eml.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/announcement.eml.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/embedded-images-tables.jpg.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/embedded-images-tables.jpg.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/embedded-images-tables.pdf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/embedded-images-tables.pdf.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/english-and-korean.png.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/english-and-korean.png.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake-doc.rtf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake-doc.rtf.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake-email-attachment.eml.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake-email-attachment.eml.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake-email.eml.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake-email.eml.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake-email.msg.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake-email.msg.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake-html.html.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake-html.html.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake-power-point.ppt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake-power-point.ppt.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake-power-point.pptx.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake-power-point.pptx.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake-text-utf-32.txt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake-text-utf-32.txt.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake-text.txt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake-text.txt.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake-xml.xml.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake-xml.xml.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake.doc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake.doc.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake.docx.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake.docx.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/fake.odt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/fake.odt.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/family-day.eml.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/family-day.eml.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/layout-parser-paper-fast.jpg.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/layout-parser-paper-fast.jpg.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/layout-parser-paper-fast.pdf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/layout-parser-paper-fast.pdf.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/layout-parser-paper.pdf.gz.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/layout-parser-paper.pdf.gz.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/layout-parser-paper.pdf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/layout-parser-paper.pdf.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/list-item-example.pdf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/list-item-example.pdf.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/multi_page_image.tif.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/multi_page_image.tif.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/spring-weather.html.json.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/spring-weather.html.json.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/stanley-cups.csv.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/stanley-cups.csv.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/stanley-cups.tsv.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/stanley-cups.tsv.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/stanley-cups.xlsx.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/stanley-cups.xlsx.json -------------------------------------------------------------------------------- /test/integration/chunkers/assets/winter-sports.epub.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/assets/winter-sports.epub.json -------------------------------------------------------------------------------- /test/integration/chunkers/test_chunkers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/chunkers/test_chunkers.py -------------------------------------------------------------------------------- /test/integration/connectors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/connectors/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/conftest.py -------------------------------------------------------------------------------- /test/integration/connectors/databricks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/connectors/databricks/test_volumes_native.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/databricks/test_volumes_native.py -------------------------------------------------------------------------------- /test/integration/connectors/discord/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/connectors/discord/test_discord.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/discord/test_discord.py -------------------------------------------------------------------------------- /test/integration/connectors/duckdb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/connectors/duckdb/assets/duckdb-schema.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/duckdb/assets/duckdb-schema.sql -------------------------------------------------------------------------------- /test/integration/connectors/duckdb/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/duckdb/conftest.py -------------------------------------------------------------------------------- /test/integration/connectors/duckdb/test_duckdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/duckdb/test_duckdb.py -------------------------------------------------------------------------------- /test/integration/connectors/duckdb/test_motherduck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/duckdb/test_motherduck.py -------------------------------------------------------------------------------- /test/integration/connectors/elasticsearch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/connectors/elasticsearch/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/elasticsearch/conftest.py -------------------------------------------------------------------------------- /test/integration/connectors/elasticsearch/test_elasticsearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/elasticsearch/test_elasticsearch.py -------------------------------------------------------------------------------- /test/integration/connectors/elasticsearch/test_opensearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/elasticsearch/test_opensearch.py -------------------------------------------------------------------------------- /test/integration/connectors/env_setup/kafka/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/env_setup/kafka/docker-compose.yaml -------------------------------------------------------------------------------- /test/integration/connectors/env_setup/milvus/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/env_setup/milvus/docker-compose.yml -------------------------------------------------------------------------------- /test/integration/connectors/env_setup/minio/source/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/env_setup/minio/source/.env -------------------------------------------------------------------------------- /test/integration/connectors/env_setup/minio/source/.gitignore: -------------------------------------------------------------------------------- 1 | !.env 2 | -------------------------------------------------------------------------------- /test/integration/connectors/env_setup/minio/source/seed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/env_setup/minio/source/seed.sh -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/.gitignore: -------------------------------------------------------------------------------- 1 | !downloads 2 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/kafka-cloud/downloads/fake-topic_0_0.txt: -------------------------------------------------------------------------------- 1 | This is some text for message 0 -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/kafka-cloud/downloads/fake-topic_0_1.txt: -------------------------------------------------------------------------------- 1 | This is some text for message 1 -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/kafka-cloud/downloads/fake-topic_0_2.txt: -------------------------------------------------------------------------------- 1 | This is some text for message 2 -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/kafka-cloud/downloads/fake-topic_0_3.txt: -------------------------------------------------------------------------------- 1 | This is some text for message 3 -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/kafka-cloud/downloads/fake-topic_0_4.txt: -------------------------------------------------------------------------------- 1 | This is some text for message 4 -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/kafka-local/downloads/fake-topic_0_0.txt: -------------------------------------------------------------------------------- 1 | This is some text for message 0 -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/kafka-local/downloads/fake-topic_0_1.txt: -------------------------------------------------------------------------------- 1 | This is some text for message 1 -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/kafka-local/downloads/fake-topic_0_2.txt: -------------------------------------------------------------------------------- 1 | This is some text for message 2 -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/kafka-local/downloads/fake-topic_0_3.txt: -------------------------------------------------------------------------------- 1 | This is some text for message 3 -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/kafka-local/downloads/fake-topic_0_4.txt: -------------------------------------------------------------------------------- 1 | This is some text for message 4 -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-1-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 1,brand_0 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-10-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 10,brand_9 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-11-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 11,brand_10 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-12-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 12,brand_11 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-13-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 13,brand_12 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-14-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 14,brand_13 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-15-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 15,brand_14 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-16-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 16,brand_15 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-17-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 17,brand_16 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-18-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 18,brand_17 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-19-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 19,brand_18 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-2-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 2,brand_1 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-20-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 20,brand_19 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-3-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 3,brand_2 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-4-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 4,brand_3 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-5-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 5,brand_4 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-6-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 6,brand_5 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-7-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 7,brand_6 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-8-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 8,brand_7 3 | -------------------------------------------------------------------------------- /test/integration/connectors/expected_results/snowflake/downloads/cars-9-a2ccd8ea.csv: -------------------------------------------------------------------------------- 1 | CAR_ID,BRAND 2 | 9,brand_8 3 | -------------------------------------------------------------------------------- /test/integration/connectors/kafka/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/kafka/conftest.py -------------------------------------------------------------------------------- /test/integration/connectors/kafka/test_kafka_cloud.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/kafka/test_kafka_cloud.py -------------------------------------------------------------------------------- /test/integration/connectors/kafka/test_kafka_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/kafka/test_kafka_local.py -------------------------------------------------------------------------------- /test/integration/connectors/sql/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/connectors/sql/test_databricks_delta_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/sql/test_databricks_delta_tables.py -------------------------------------------------------------------------------- /test/integration/connectors/sql/test_postgres.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/sql/test_postgres.py -------------------------------------------------------------------------------- /test/integration/connectors/sql/test_singlestore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/sql/test_singlestore.py -------------------------------------------------------------------------------- /test/integration/connectors/sql/test_snowflake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/sql/test_snowflake.py -------------------------------------------------------------------------------- /test/integration/connectors/sql/test_sqlite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/sql/test_sqlite.py -------------------------------------------------------------------------------- /test/integration/connectors/sql/test_vastdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/sql/test_vastdb.py -------------------------------------------------------------------------------- /test/integration/connectors/test_astradb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_astradb.py -------------------------------------------------------------------------------- /test/integration/connectors/test_azure_ai_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_azure_ai_search.py -------------------------------------------------------------------------------- /test/integration/connectors/test_chroma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_chroma.py -------------------------------------------------------------------------------- /test/integration/connectors/test_confluence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_confluence.py -------------------------------------------------------------------------------- /test/integration/connectors/test_delta_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_delta_table.py -------------------------------------------------------------------------------- /test/integration/connectors/test_dropbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_dropbox.py -------------------------------------------------------------------------------- /test/integration/connectors/test_github.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_github.py -------------------------------------------------------------------------------- /test/integration/connectors/test_google_drive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_google_drive.py -------------------------------------------------------------------------------- /test/integration/connectors/test_jira.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_jira.py -------------------------------------------------------------------------------- /test/integration/connectors/test_lancedb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_lancedb.py -------------------------------------------------------------------------------- /test/integration/connectors/test_milvus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_milvus.py -------------------------------------------------------------------------------- /test/integration/connectors/test_mongodb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_mongodb.py -------------------------------------------------------------------------------- /test/integration/connectors/test_neo4j.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_neo4j.py -------------------------------------------------------------------------------- /test/integration/connectors/test_notion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_notion.py -------------------------------------------------------------------------------- /test/integration/connectors/test_onedrive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_onedrive.py -------------------------------------------------------------------------------- /test/integration/connectors/test_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_pinecone.py -------------------------------------------------------------------------------- /test/integration/connectors/test_qdrant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_qdrant.py -------------------------------------------------------------------------------- /test/integration/connectors/test_redis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_redis.py -------------------------------------------------------------------------------- /test/integration/connectors/test_s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_s3.py -------------------------------------------------------------------------------- /test/integration/connectors/test_sharepoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_sharepoint.py -------------------------------------------------------------------------------- /test/integration/connectors/test_vectara.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_vectara.py -------------------------------------------------------------------------------- /test/integration/connectors/test_zendesk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/test_zendesk.py -------------------------------------------------------------------------------- /test/integration/connectors/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/connectors/utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/utils/constants.py -------------------------------------------------------------------------------- /test/integration/connectors/utils/docker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/utils/docker.py -------------------------------------------------------------------------------- /test/integration/connectors/utils/docker_compose.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/utils/docker_compose.py -------------------------------------------------------------------------------- /test/integration/connectors/utils/validation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/connectors/utils/validation/destination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/utils/validation/destination.py -------------------------------------------------------------------------------- /test/integration/connectors/utils/validation/equality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/utils/validation/equality.py -------------------------------------------------------------------------------- /test/integration/connectors/utils/validation/source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/utils/validation/source.py -------------------------------------------------------------------------------- /test/integration/connectors/utils/validation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/utils/validation/utils.py -------------------------------------------------------------------------------- /test/integration/connectors/weaviate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/connectors/weaviate/assets/elements.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/weaviate/assets/elements.json -------------------------------------------------------------------------------- /test/integration/connectors/weaviate/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/weaviate/conftest.py -------------------------------------------------------------------------------- /test/integration/connectors/weaviate/test_cloud.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/weaviate/test_cloud.py -------------------------------------------------------------------------------- /test/integration/connectors/weaviate/test_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/connectors/weaviate/test_local.py -------------------------------------------------------------------------------- /test/integration/embedders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/embedders/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/conftest.py -------------------------------------------------------------------------------- /test/integration/embedders/test_azure_openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/test_azure_openai.py -------------------------------------------------------------------------------- /test/integration/embedders/test_bedrock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/test_bedrock.py -------------------------------------------------------------------------------- /test/integration/embedders/test_huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/test_huggingface.py -------------------------------------------------------------------------------- /test/integration/embedders/test_mixedbread.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/test_mixedbread.py -------------------------------------------------------------------------------- /test/integration/embedders/test_octoai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/test_octoai.py -------------------------------------------------------------------------------- /test/integration/embedders/test_openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/test_openai.py -------------------------------------------------------------------------------- /test/integration/embedders/test_togetherai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/test_togetherai.py -------------------------------------------------------------------------------- /test/integration/embedders/test_vertexai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/test_vertexai.py -------------------------------------------------------------------------------- /test/integration/embedders/test_voyageai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/test_voyageai.py -------------------------------------------------------------------------------- /test/integration/embedders/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/embedders/utils.py -------------------------------------------------------------------------------- /test/integration/partitioners/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/partitioners/assets/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/README.md -------------------------------------------------------------------------------- /test/integration/partitioners/assets/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/README.rst -------------------------------------------------------------------------------- /test/integration/partitioners/assets/alert.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/alert.eml -------------------------------------------------------------------------------- /test/integration/partitioners/assets/announcement.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/announcement.eml -------------------------------------------------------------------------------- /test/integration/partitioners/assets/embedded-images-tables.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/embedded-images-tables.jpg -------------------------------------------------------------------------------- /test/integration/partitioners/assets/embedded-images-tables.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/embedded-images-tables.pdf -------------------------------------------------------------------------------- /test/integration/partitioners/assets/english-and-korean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/english-and-korean.png -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-doc.rtf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-doc.rtf -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-email-attachment.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-email-attachment.eml -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-email-image-embedded.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-email-image-embedded.eml -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-email.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-email.eml -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-email.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-email.msg -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-html.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-html.html -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-power-point.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-power-point.ppt -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-power-point.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-power-point.pptx -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-text-utf-32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-text-utf-32.txt -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-text.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-text.txt -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake-xml.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake-xml.xml -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake.doc -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake.docx -------------------------------------------------------------------------------- /test/integration/partitioners/assets/fake.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/fake.odt -------------------------------------------------------------------------------- /test/integration/partitioners/assets/family-day.eml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/family-day.eml -------------------------------------------------------------------------------- /test/integration/partitioners/assets/layout-parser-paper-fast.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/layout-parser-paper-fast.jpg -------------------------------------------------------------------------------- /test/integration/partitioners/assets/layout-parser-paper-fast.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/layout-parser-paper-fast.pdf -------------------------------------------------------------------------------- /test/integration/partitioners/assets/layout-parser-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/layout-parser-paper.pdf -------------------------------------------------------------------------------- /test/integration/partitioners/assets/layout-parser-paper.pdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/layout-parser-paper.pdf.gz -------------------------------------------------------------------------------- /test/integration/partitioners/assets/list-item-example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/list-item-example.pdf -------------------------------------------------------------------------------- /test/integration/partitioners/assets/multi_page_image.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/multi_page_image.tif -------------------------------------------------------------------------------- /test/integration/partitioners/assets/spring-weather.html.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/spring-weather.html.json -------------------------------------------------------------------------------- /test/integration/partitioners/assets/stanley-cups.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/stanley-cups.csv -------------------------------------------------------------------------------- /test/integration/partitioners/assets/stanley-cups.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/stanley-cups.tsv -------------------------------------------------------------------------------- /test/integration/partitioners/assets/stanley-cups.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/stanley-cups.xlsx -------------------------------------------------------------------------------- /test/integration/partitioners/assets/winter-sports.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/assets/winter-sports.epub -------------------------------------------------------------------------------- /test/integration/partitioners/test_partitioner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/partitioners/test_partitioner.py -------------------------------------------------------------------------------- /test/integration/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/integration/utils.py -------------------------------------------------------------------------------- /test/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/chunkers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/chunkers/test_chunkers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/chunkers/test_chunkers.py -------------------------------------------------------------------------------- /test/unit/cli/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/cli/test_utils.py -------------------------------------------------------------------------------- /test/unit/connectors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/connectors/ibm_watsonx/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py -------------------------------------------------------------------------------- /test/unit/connectors/motherduck/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/connectors/motherduck/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/connectors/motherduck/test_base.py -------------------------------------------------------------------------------- /test/unit/connectors/notion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/connectors/notion/types/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/connectors/notion/types/database_properties/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/connectors/notion/types/test_user.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/connectors/notion/types/test_user.py -------------------------------------------------------------------------------- /test/unit/connectors/sql/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/connectors/sql/test_snowflake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/connectors/sql/test_snowflake.py -------------------------------------------------------------------------------- /test/unit/connectors/sql/test_sql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/connectors/sql/test_sql.py -------------------------------------------------------------------------------- /test/unit/connectors/test_confluence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/connectors/test_confluence.py -------------------------------------------------------------------------------- /test/unit/connectors/test_json_schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/connectors/test_json_schema.py -------------------------------------------------------------------------------- /test/unit/connectors/test_neo4j.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/connectors/test_neo4j.py -------------------------------------------------------------------------------- /test/unit/connectors/test_sharepoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/connectors/test_sharepoint.py -------------------------------------------------------------------------------- /test/unit/connectors/weaviate/test_weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/connectors/weaviate/test_weaviate.py -------------------------------------------------------------------------------- /test/unit/embed/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/embed/test_mixedbreadai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embed/test_mixedbreadai.py -------------------------------------------------------------------------------- /test/unit/embed/test_octoai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embed/test_octoai.py -------------------------------------------------------------------------------- /test/unit/embed/test_openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embed/test_openai.py -------------------------------------------------------------------------------- /test/unit/embed/test_vertexai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embed/test_vertexai.py -------------------------------------------------------------------------------- /test/unit/embed/test_voyageai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embed/test_voyageai.py -------------------------------------------------------------------------------- /test/unit/embedders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/embedders/test_bedrock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embedders/test_bedrock.py -------------------------------------------------------------------------------- /test/unit/embedders/test_huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embedders/test_huggingface.py -------------------------------------------------------------------------------- /test/unit/embedders/test_mixedbread.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embedders/test_mixedbread.py -------------------------------------------------------------------------------- /test/unit/embedders/test_octoai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embedders/test_octoai.py -------------------------------------------------------------------------------- /test/unit/embedders/test_openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embedders/test_openai.py -------------------------------------------------------------------------------- /test/unit/embedders/test_togetherai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embedders/test_togetherai.py -------------------------------------------------------------------------------- /test/unit/embedders/test_vertexai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embedders/test_vertexai.py -------------------------------------------------------------------------------- /test/unit/embedders/test_voyageai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/embedders/test_voyageai.py -------------------------------------------------------------------------------- /test/unit/partitioners/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/partitioners/test_partitioner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/partitioners/test_partitioner.py -------------------------------------------------------------------------------- /test/unit/processes/connectors/test_astradb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/processes/connectors/test_astradb.py -------------------------------------------------------------------------------- /test/unit/processes/connectors/test_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/processes/connectors/test_pinecone.py -------------------------------------------------------------------------------- /test/unit/test_error.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/test_error.py -------------------------------------------------------------------------------- /test/unit/test_html.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/test_html.py -------------------------------------------------------------------------------- /test/unit/test_interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/test_interfaces.py -------------------------------------------------------------------------------- /test/unit/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/test_utils.py -------------------------------------------------------------------------------- /test/unit/unstructured/test_chunking_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/unstructured/test_chunking_utils.py -------------------------------------------------------------------------------- /test/unit/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/unit/utils/data_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test/unit/utils/data_generator.py -------------------------------------------------------------------------------- /test_e2e/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/.gitignore -------------------------------------------------------------------------------- /test_e2e/check-diff-expected-output.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/check-diff-expected-output.py -------------------------------------------------------------------------------- /test_e2e/check-num-dirs-output.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/check-num-dirs-output.sh -------------------------------------------------------------------------------- /test_e2e/check-num-files-expected-output.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/check-num-files-expected-output.sh -------------------------------------------------------------------------------- /test_e2e/check-num-files-output.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/check-num-files-output.sh -------------------------------------------------------------------------------- /test_e2e/clean-permissions-files.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/clean-permissions-files.sh -------------------------------------------------------------------------------- /test_e2e/cleanup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/cleanup.sh -------------------------------------------------------------------------------- /test_e2e/dest/azure.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/dest/azure.sh -------------------------------------------------------------------------------- /test_e2e/dest/box.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/dest/box.sh -------------------------------------------------------------------------------- /test_e2e/dest/couchbase.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/dest/couchbase.sh -------------------------------------------------------------------------------- /test_e2e/dest/gcs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/dest/gcs.sh -------------------------------------------------------------------------------- /test_e2e/dest/kdbai.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/dest/kdbai.sh -------------------------------------------------------------------------------- /test_e2e/dest/s3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/dest/s3.sh -------------------------------------------------------------------------------- /test_e2e/dest/sharepoint-embed-cog-index.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/dest/sharepoint-embed-cog-index.sh -------------------------------------------------------------------------------- /test_e2e/docker-compose.otel.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/docker-compose.otel.yml -------------------------------------------------------------------------------- /test_e2e/env_setup/airtable/check-num-rows-and-columns-output.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/airtable/check-num-rows-and-columns-output.sh -------------------------------------------------------------------------------- /test_e2e/env_setup/airtable/component_ids.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/airtable/component_ids.sh -------------------------------------------------------------------------------- /test_e2e/env_setup/airtable/create_scale_test_components.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/airtable/create_scale_test_components.py -------------------------------------------------------------------------------- /test_e2e/env_setup/airtable/create_scale_test_components.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/airtable/create_scale_test_components.sh -------------------------------------------------------------------------------- /test_e2e/env_setup/airtable/print_num_rows_df.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/airtable/print_num_rows_df.py -------------------------------------------------------------------------------- /test_e2e/env_setup/couchbase/common/check_cluster_health.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/couchbase/common/check_cluster_health.py -------------------------------------------------------------------------------- /test_e2e/env_setup/couchbase/common/constants.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/couchbase/common/constants.env -------------------------------------------------------------------------------- /test_e2e/env_setup/couchbase/common/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/couchbase/common/docker-compose.yaml -------------------------------------------------------------------------------- /test_e2e/env_setup/couchbase/common/setup_couchbase_cluster.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/couchbase/common/setup_couchbase_cluster.sh -------------------------------------------------------------------------------- /test_e2e/env_setup/kafka/create-kafka-instance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/kafka/create-kafka-instance.sh -------------------------------------------------------------------------------- /test_e2e/env_setup/kafka/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/kafka/docker-compose.yml -------------------------------------------------------------------------------- /test_e2e/env_setup/kdbai/.gitignore: -------------------------------------------------------------------------------- 1 | k4.lic 2 | -------------------------------------------------------------------------------- /test_e2e/env_setup/kdbai/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/kdbai/docker-compose.yml -------------------------------------------------------------------------------- /test_e2e/env_setup/kdbai/k4.lic.gpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/kdbai/k4.lic.gpg -------------------------------------------------------------------------------- /test_e2e/env_setup/kdbai/provision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/kdbai/provision.py -------------------------------------------------------------------------------- /test_e2e/env_setup/kdbai/provision.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/kdbai/provision.sh -------------------------------------------------------------------------------- /test_e2e/env_setup/kdbai/test_output.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/kdbai/test_output.py -------------------------------------------------------------------------------- /test_e2e/env_setup/sftp/create-and-check-sftp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/sftp/create-and-check-sftp.sh -------------------------------------------------------------------------------- /test_e2e/env_setup/sftp/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/sftp/docker-compose.yaml -------------------------------------------------------------------------------- /test_e2e/env_setup/sftp/folder1/wiki_movie_plots_small.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/env_setup/sftp/folder1/wiki_movie_plots_small.csv -------------------------------------------------------------------------------- /test_e2e/example-docs/layout-parser-paper-with-table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/example-docs/layout-parser-paper-with-table.jpg -------------------------------------------------------------------------------- /test_e2e/example-docs/layout-parser-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/example-docs/layout-parser-paper.pdf -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/Sharepoint-with-permissions/SitePages/Home.aspx.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/Sharepoint/Document.docx.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/Sharepoint/Document.docx.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/Sharepoint/SitePages/Home.aspx.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/Sharepoint/fake-text.txt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/Sharepoint/fake-text.txt.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/azure/IRS-form-1987.pdf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/azure/IRS-form-1987.pdf.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/azure/IRS-form-1987.png.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/azure/IRS-form-1987.png.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/azure/rfc854.txt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/azure/rfc854.txt.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/azure/spring-weather.html.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/azure/spring-weather.html.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/box/handbook-1p.docx.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/box/handbook-1p.docx.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/gcs/ideas-page.html.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/gcs/ideas-page.html.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/github/LICENSE.txt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/github/LICENSE.txt.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/github/test.html.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/github/test.html.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/gitlab/docs/_index.md.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/gitlab/docs/_index.md.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/google-drive/fake.docx.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/google-drive/fake.docx.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/google-drive/foo.txt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/expected-structured-output/google-drive/foo.txt.json -------------------------------------------------------------------------------- /test_e2e/expected-structured-output/pdf-fast-reprocess/azure/IRS-form-1987.pdf.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /test_e2e/failed-partition-docs/sample.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/failed-partition-docs/sample.gif -------------------------------------------------------------------------------- /test_e2e/failed-partition-docs/small.txt: -------------------------------------------------------------------------------- 1 | This is some test to partition 2 | -------------------------------------------------------------------------------- /test_e2e/files/azure_ai_index_schema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/files/azure_ai_index_schema.json -------------------------------------------------------------------------------- /test_e2e/otel_assets/otel-collector-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/otel_assets/otel-collector-config.yaml -------------------------------------------------------------------------------- /test_e2e/python/test-azure-output.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/python/test-azure-output.py -------------------------------------------------------------------------------- /test_e2e/python/test-gcs-output.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/python/test-gcs-output.py -------------------------------------------------------------------------------- /test_e2e/python/test-ingest-couchbase-output.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/python/test-ingest-couchbase-output.py -------------------------------------------------------------------------------- /test_e2e/reference_test_scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/reference_test_scripts/README.md -------------------------------------------------------------------------------- /test_e2e/reference_test_scripts/pinecone.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/reference_test_scripts/pinecone.sh -------------------------------------------------------------------------------- /test_e2e/src/against-api.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/against-api.sh -------------------------------------------------------------------------------- /test_e2e/src/airtable-diff.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/airtable-diff.sh -------------------------------------------------------------------------------- /test_e2e/src/airtable-large.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/airtable-large.sh -------------------------------------------------------------------------------- /test_e2e/src/azure.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/azure.sh -------------------------------------------------------------------------------- /test_e2e/src/box.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/box.sh -------------------------------------------------------------------------------- /test_e2e/src/couchbase.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/couchbase.sh -------------------------------------------------------------------------------- /test_e2e/src/gcs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/gcs.sh -------------------------------------------------------------------------------- /test_e2e/src/gitlab.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/gitlab.sh -------------------------------------------------------------------------------- /test_e2e/src/google-drive.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/google-drive.sh -------------------------------------------------------------------------------- /test_e2e/src/outlook.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/outlook.sh -------------------------------------------------------------------------------- /test_e2e/src/pdf-fast-reprocess.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/pdf-fast-reprocess.sh -------------------------------------------------------------------------------- /test_e2e/src/s3-compression.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/s3-compression.sh -------------------------------------------------------------------------------- /test_e2e/src/s3-filter.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/s3-filter.sh -------------------------------------------------------------------------------- /test_e2e/src/s3-sample.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/s3-sample.sh -------------------------------------------------------------------------------- /test_e2e/src/s3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/s3.sh -------------------------------------------------------------------------------- /test_e2e/src/salesforce.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/salesforce.sh -------------------------------------------------------------------------------- /test_e2e/src/sftp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/sftp.sh -------------------------------------------------------------------------------- /test_e2e/src/slack.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/src/slack.sh -------------------------------------------------------------------------------- /test_e2e/test-dest.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/test-dest.sh -------------------------------------------------------------------------------- /test_e2e/test-help.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/test-help.py -------------------------------------------------------------------------------- /test_e2e/test-src.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/test_e2e/test-src.sh -------------------------------------------------------------------------------- /unstructured_ingest/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | -------------------------------------------------------------------------------- /unstructured_ingest/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.2.22" # pragma: no cover 2 | -------------------------------------------------------------------------------- /unstructured_ingest/cli/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/cli/README.md -------------------------------------------------------------------------------- /unstructured_ingest/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_ingest/cli/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/cli/base/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/cli/base/cmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/cli/base/cmd.py -------------------------------------------------------------------------------- /unstructured_ingest/cli/base/dest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/cli/base/dest.py -------------------------------------------------------------------------------- /unstructured_ingest/cli/base/importer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/cli/base/importer.py -------------------------------------------------------------------------------- /unstructured_ingest/cli/base/src.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/cli/base/src.py -------------------------------------------------------------------------------- /unstructured_ingest/cli/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/cli/cli.py -------------------------------------------------------------------------------- /unstructured_ingest/cli/cmds.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/cli/cmds.py -------------------------------------------------------------------------------- /unstructured_ingest/cli/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_ingest/cli/utils/click.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/cli/utils/click.py -------------------------------------------------------------------------------- /unstructured_ingest/cli/utils/model_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/cli/utils/model_conversion.py -------------------------------------------------------------------------------- /unstructured_ingest/data_types/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_ingest/data_types/entities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/data_types/entities.py -------------------------------------------------------------------------------- /unstructured_ingest/data_types/file_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/data_types/file_data.py -------------------------------------------------------------------------------- /unstructured_ingest/embed/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_ingest/embed/azure_openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/embed/azure_openai.py -------------------------------------------------------------------------------- /unstructured_ingest/embed/bedrock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/embed/bedrock.py -------------------------------------------------------------------------------- /unstructured_ingest/embed/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/embed/huggingface.py -------------------------------------------------------------------------------- /unstructured_ingest/embed/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/embed/interfaces.py -------------------------------------------------------------------------------- /unstructured_ingest/embed/mixedbreadai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/embed/mixedbreadai.py -------------------------------------------------------------------------------- /unstructured_ingest/embed/octoai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/embed/octoai.py -------------------------------------------------------------------------------- /unstructured_ingest/embed/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/embed/openai.py -------------------------------------------------------------------------------- /unstructured_ingest/embed/togetherai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/embed/togetherai.py -------------------------------------------------------------------------------- /unstructured_ingest/embed/vertexai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/embed/vertexai.py -------------------------------------------------------------------------------- /unstructured_ingest/embed/voyageai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/embed/voyageai.py -------------------------------------------------------------------------------- /unstructured_ingest/error.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/error.py -------------------------------------------------------------------------------- /unstructured_ingest/errors_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/errors_v2.py -------------------------------------------------------------------------------- /unstructured_ingest/interfaces/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/interfaces/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/interfaces/connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/interfaces/connector.py -------------------------------------------------------------------------------- /unstructured_ingest/interfaces/downloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/interfaces/downloader.py -------------------------------------------------------------------------------- /unstructured_ingest/interfaces/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/interfaces/indexer.py -------------------------------------------------------------------------------- /unstructured_ingest/interfaces/process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/interfaces/process.py -------------------------------------------------------------------------------- /unstructured_ingest/interfaces/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/interfaces/processor.py -------------------------------------------------------------------------------- /unstructured_ingest/interfaces/upload_stager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/interfaces/upload_stager.py -------------------------------------------------------------------------------- /unstructured_ingest/interfaces/uploader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/interfaces/uploader.py -------------------------------------------------------------------------------- /unstructured_ingest/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/logger.py -------------------------------------------------------------------------------- /unstructured_ingest/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/main.py -------------------------------------------------------------------------------- /unstructured_ingest/otel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/otel.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/interfaces.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/otel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/otel.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/pipeline.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/steps/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/steps/chunk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/steps/chunk.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/steps/download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/steps/download.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/steps/embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/steps/embed.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/steps/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/steps/filter.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/steps/index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/steps/index.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/steps/partition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/steps/partition.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/steps/stage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/steps/stage.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/steps/uncompress.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/steps/uncompress.py -------------------------------------------------------------------------------- /unstructured_ingest/pipeline/steps/upload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/pipeline/steps/upload.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/chunker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/chunker.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connector_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connector_registry.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/airtable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/airtable.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/assets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/astradb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/astradb.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/azure_ai_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/azure_ai_search.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/chroma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/chroma.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/confluence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/confluence.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/couchbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/couchbase.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/databricks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/databricks/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/databricks/volumes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/databricks/volumes.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/delta_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/delta_table.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/discord.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/discord.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/duckdb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/duckdb/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/duckdb/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/duckdb/base.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/duckdb/duckdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/duckdb/duckdb.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/duckdb/motherduck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/duckdb/motherduck.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/fsspec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/fsspec/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/fsspec/azure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/fsspec/azure.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/fsspec/box.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/fsspec/box.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/fsspec/dropbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/fsspec/dropbox.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/fsspec/fsspec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/fsspec/fsspec.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/fsspec/gcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/fsspec/gcs.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/fsspec/s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/fsspec/s3.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/fsspec/sftp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/fsspec/sftp.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/fsspec/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/fsspec/utils.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/github.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/github.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/gitlab.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/gitlab.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/google_drive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/google_drive.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/jira.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/jira.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/kafka/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/kafka/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/kafka/cloud.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/kafka/cloud.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/kafka/kafka.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/kafka/kafka.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/kafka/local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/kafka/local.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/kdbai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/kdbai.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/lancedb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/lancedb/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/lancedb/aws.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/lancedb/aws.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/lancedb/azure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/lancedb/azure.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/lancedb/cloud.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/lancedb/cloud.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/lancedb/gcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/lancedb/gcp.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/lancedb/lancedb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/lancedb/lancedb.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/lancedb/local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/lancedb/local.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/local.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/milvus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/milvus.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/mongodb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/mongodb.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/neo4j.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/neo4j.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/notion/client.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/notion/connector.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/notion/helpers.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/notion/interfaces.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/types/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/types/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/notion/types/block.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/types/date.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/notion/types/date.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/types/file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/notion/types/file.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/types/page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/notion/types/page.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/types/parent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/notion/types/parent.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/notion/types/user.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/notion/types/user.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/onedrive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/onedrive.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/outlook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/outlook.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/pinecone.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/qdrant/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/qdrant/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/qdrant/cloud.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/qdrant/cloud.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/qdrant/local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/qdrant/local.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/qdrant/qdrant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/qdrant/qdrant.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/qdrant/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/qdrant/server.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/redisdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/redisdb.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/salesforce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/salesforce.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/sharepoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/sharepoint.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/slack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/slack.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/sql/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/sql/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/sql/postgres.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/sql/postgres.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/sql/singlestore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/sql/singlestore.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/sql/snowflake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/sql/snowflake.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/sql/sql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/sql/sql.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/sql/sqlite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/sql/sqlite.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/sql/vastdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/sql/vastdb.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/utils.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/vectara.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/vectara.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/weaviate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/weaviate/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/weaviate/cloud.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/weaviate/cloud.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/weaviate/embedded.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/weaviate/embedded.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/weaviate/local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/weaviate/local.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/weaviate/weaviate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/weaviate/weaviate.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/zendesk/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/zendesk/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/zendesk/client.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/connectors/zendesk/zendesk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/connectors/zendesk/zendesk.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/embedder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/embedder.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/filter.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/partitioner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/partitioner.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/uncompress.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/uncompress.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/utils/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/utils/blob_storage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/utils/blob_storage.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/utils/logging/connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/utils/logging/connector.py -------------------------------------------------------------------------------- /unstructured_ingest/processes/utils/logging/sanitizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/processes/utils/logging/sanitizer.py -------------------------------------------------------------------------------- /unstructured_ingest/unstructured_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/unstructured_api.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/__init__.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/chunking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/chunking.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/compression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/compression.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/constants.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/data_prep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/data_prep.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/dep_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/dep_check.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/filesystem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/filesystem.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/html.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/html.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/ndjson.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/ndjson.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/pydantic_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/pydantic_models.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/string_and_date_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/string_and_date_utils.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/table.py -------------------------------------------------------------------------------- /unstructured_ingest/utils/tls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/unstructured_ingest/utils/tls.py -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/unstructured-ingest/HEAD/uv.lock --------------------------------------------------------------------------------