├── .env.example ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── converted_images └── llama │ ├── sample-3_page_1.jpg │ └── sample-3_page_2.jpg ├── genie_logo.png ├── images ├── genie_logo.png └── ui.png ├── input ├── sample-1.pdf ├── sample-2.pdf ├── sample-3.pdf ├── sample-4.pdf ├── sample-5.pdf └── sample_img.png ├── output └── table-page-1-table-1.csv ├── parser ├── __init__.py ├── amazon-textract │ ├── __init__.py │ └── lc_amazon_textract.py ├── camelot │ ├── __init__.py │ └── camelot_.py ├── claude │ ├── __init__.py │ ├── claude_.py │ └── claude_and_llama.py ├── docling │ ├── __init__.py │ ├── docling_and_llama.py │ └── docling_custom.py ├── gemini │ ├── __init__.py │ └── gemini.py ├── llama-parse │ ├── __init__.py │ └── llama_parse_example.py ├── llama-vision │ ├── __init__.py │ └── llama_vision.py ├── marker │ └── __init__.py ├── markitdown │ ├── __init__.py │ └── markitdown_pdf.py ├── mistral_ocr │ ├── __init__.py │ └── mistral.py ├── openai │ ├── __init__.py │ └── openai_.py ├── pdfminer │ ├── __init__.py │ └── lc_pdfminer.py ├── pdfplumber │ ├── __init__.py │ └── lc_pdfplumber.py ├── pymupdf │ ├── __init__.py │ └── lc_pymupdf.py ├── pypdf │ ├── __init__.py │ └── lc_pypdf.py ├── pypdfdirectory │ ├── __init__.py │ └── lc_pypdfdirectory.py ├── pypdfium │ ├── __init__.py │ └── lc_pypdfium.py └── unstructured-io │ ├── __init__.py │ └── lc_unstructured-io.py ├── pdf-parsing-guide.pdf ├── pdf_parser_app.py ├── requirements.txt ├── utils ├── __init__.py ├── pdf_to_image.py └── utils.py ├── vlm_ocr ├── README.md ├── anthropic │ └── main.py ├── gemini │ └── main.py ├── mistral_ocr │ └── main.py ├── msft_kosmos_2.5 │ └── main.py ├── ollama_models │ └── main.py ├── omniai │ └── main.py ├── openai │ └── main.py └── smol_docling │ └── main.py └── vlm_ocr_app.py /.env.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/.env.example -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/README.md -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /converted_images/llama/sample-3_page_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/converted_images/llama/sample-3_page_1.jpg -------------------------------------------------------------------------------- /converted_images/llama/sample-3_page_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/converted_images/llama/sample-3_page_2.jpg -------------------------------------------------------------------------------- /genie_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/genie_logo.png -------------------------------------------------------------------------------- /images/genie_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/images/genie_logo.png -------------------------------------------------------------------------------- /images/ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/images/ui.png -------------------------------------------------------------------------------- /input/sample-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/input/sample-1.pdf -------------------------------------------------------------------------------- /input/sample-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/input/sample-2.pdf -------------------------------------------------------------------------------- /input/sample-3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/input/sample-3.pdf -------------------------------------------------------------------------------- /input/sample-4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/input/sample-4.pdf -------------------------------------------------------------------------------- /input/sample-5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/input/sample-5.pdf -------------------------------------------------------------------------------- /input/sample_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/input/sample_img.png -------------------------------------------------------------------------------- /output/table-page-1-table-1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/output/table-page-1-table-1.csv -------------------------------------------------------------------------------- /parser/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/amazon-textract/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/amazon-textract/lc_amazon_textract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/amazon-textract/lc_amazon_textract.py -------------------------------------------------------------------------------- /parser/camelot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/camelot/camelot_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/camelot/camelot_.py -------------------------------------------------------------------------------- /parser/claude/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/claude/claude_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/claude/claude_.py -------------------------------------------------------------------------------- /parser/claude/claude_and_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/claude/claude_and_llama.py -------------------------------------------------------------------------------- /parser/docling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/docling/docling_and_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/docling/docling_and_llama.py -------------------------------------------------------------------------------- /parser/docling/docling_custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/docling/docling_custom.py -------------------------------------------------------------------------------- /parser/gemini/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/gemini/gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/gemini/gemini.py -------------------------------------------------------------------------------- /parser/llama-parse/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/llama-parse/llama_parse_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/llama-parse/llama_parse_example.py -------------------------------------------------------------------------------- /parser/llama-vision/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/llama-vision/llama_vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/llama-vision/llama_vision.py -------------------------------------------------------------------------------- /parser/marker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/markitdown/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/markitdown/markitdown_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/markitdown/markitdown_pdf.py -------------------------------------------------------------------------------- /parser/mistral_ocr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/mistral_ocr/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/mistral_ocr/mistral.py -------------------------------------------------------------------------------- /parser/openai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/openai/openai_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/openai/openai_.py -------------------------------------------------------------------------------- /parser/pdfminer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/pdfminer/lc_pdfminer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/pdfminer/lc_pdfminer.py -------------------------------------------------------------------------------- /parser/pdfplumber/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/pdfplumber/lc_pdfplumber.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/pdfplumber/lc_pdfplumber.py -------------------------------------------------------------------------------- /parser/pymupdf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/pymupdf/lc_pymupdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/pymupdf/lc_pymupdf.py -------------------------------------------------------------------------------- /parser/pypdf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/pypdf/lc_pypdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/pypdf/lc_pypdf.py -------------------------------------------------------------------------------- /parser/pypdfdirectory/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/pypdfdirectory/lc_pypdfdirectory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/pypdfdirectory/lc_pypdfdirectory.py -------------------------------------------------------------------------------- /parser/pypdfium/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/pypdfium/lc_pypdfium.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/pypdfium/lc_pypdfium.py -------------------------------------------------------------------------------- /parser/unstructured-io/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/unstructured-io/lc_unstructured-io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/parser/unstructured-io/lc_unstructured-io.py -------------------------------------------------------------------------------- /pdf-parsing-guide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/pdf-parsing-guide.pdf -------------------------------------------------------------------------------- /pdf_parser_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/pdf_parser_app.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/requirements.txt -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/pdf_to_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/utils/pdf_to_image.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/utils/utils.py -------------------------------------------------------------------------------- /vlm_ocr/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/vlm_ocr/README.md -------------------------------------------------------------------------------- /vlm_ocr/anthropic/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/vlm_ocr/anthropic/main.py -------------------------------------------------------------------------------- /vlm_ocr/gemini/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/vlm_ocr/gemini/main.py -------------------------------------------------------------------------------- /vlm_ocr/mistral_ocr/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/vlm_ocr/mistral_ocr/main.py -------------------------------------------------------------------------------- /vlm_ocr/msft_kosmos_2.5/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/vlm_ocr/msft_kosmos_2.5/main.py -------------------------------------------------------------------------------- /vlm_ocr/ollama_models/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/vlm_ocr/ollama_models/main.py -------------------------------------------------------------------------------- /vlm_ocr/omniai/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/vlm_ocr/omniai/main.py -------------------------------------------------------------------------------- /vlm_ocr/openai/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/vlm_ocr/openai/main.py -------------------------------------------------------------------------------- /vlm_ocr/smol_docling/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/vlm_ocr/smol_docling/main.py -------------------------------------------------------------------------------- /vlm_ocr_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genieincodebottle/parsemypdf/HEAD/vlm_ocr_app.py --------------------------------------------------------------------------------