├── .github └── workflows │ ├── claude-code-review.yml │ └── claude.yml ├── .gitignore ├── CLAUDE.md ├── LICENSE ├── README.md ├── docstrange ├── WEB_INTERFACE.md ├── __init__.py ├── cli.py ├── config.py ├── exceptions.py ├── extractor.py ├── pipeline │ ├── __init__.py │ ├── layout_detector.py │ ├── model_downloader.py │ ├── nanonets_processor.py │ ├── neural_document_processor.py │ └── ocr_service.py ├── processors │ ├── __init__.py │ ├── base.py │ ├── cloud_processor.py │ ├── docx_processor.py │ ├── excel_processor.py │ ├── gpu_processor.py │ ├── html_processor.py │ ├── image_processor.py │ ├── pdf_processor.py │ ├── pptx_processor.py │ ├── txt_processor.py │ └── url_processor.py ├── result.py ├── services │ ├── __init__.py │ ├── auth_service.py │ └── ollama_service.py ├── static │ ├── logo_clean.png │ ├── script.js │ └── styles.css ├── templates │ └── index.html ├── utils │ ├── __init__.py │ └── gpu_utils.py └── web_app.py ├── example.py ├── examples └── test.py ├── mcp_server_module ├── README.md ├── __init__.py ├── __main__.py ├── claude_desktop_config.json └── server.py ├── pyproject.toml ├── scripts ├── README.md ├── __init__.py ├── prepare_s3_models.py ├── setup_dev.py └── setup_environment.sh └── tests ├── debug_ocr.py ├── debug_ocr_provider.py ├── test_advanced_ocr.py ├── test_cloud_mode.py ├── test_converter.py ├── test_enhanced_layout.py ├── test_enhanced_library.py ├── test_enhanced_pdf_processor.py ├── test_html_generation.py ├── test_json_structure.py ├── test_ocr_with_real_image.py ├── test_real_files.py ├── test_real_files_enhanced.py └── test_real_json_conversion.py /.github/workflows/claude-code-review.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/.github/workflows/claude-code-review.yml -------------------------------------------------------------------------------- /.github/workflows/claude.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/.github/workflows/claude.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/.gitignore -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/CLAUDE.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/README.md -------------------------------------------------------------------------------- /docstrange/WEB_INTERFACE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/WEB_INTERFACE.md -------------------------------------------------------------------------------- /docstrange/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/__init__.py -------------------------------------------------------------------------------- /docstrange/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/cli.py -------------------------------------------------------------------------------- /docstrange/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/config.py -------------------------------------------------------------------------------- /docstrange/exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/exceptions.py -------------------------------------------------------------------------------- /docstrange/extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/extractor.py -------------------------------------------------------------------------------- /docstrange/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | """Pipeline package for document processing and OCR.""" -------------------------------------------------------------------------------- /docstrange/pipeline/layout_detector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/pipeline/layout_detector.py -------------------------------------------------------------------------------- /docstrange/pipeline/model_downloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/pipeline/model_downloader.py -------------------------------------------------------------------------------- /docstrange/pipeline/nanonets_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/pipeline/nanonets_processor.py -------------------------------------------------------------------------------- /docstrange/pipeline/neural_document_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/pipeline/neural_document_processor.py -------------------------------------------------------------------------------- /docstrange/pipeline/ocr_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/pipeline/ocr_service.py -------------------------------------------------------------------------------- /docstrange/processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/__init__.py -------------------------------------------------------------------------------- /docstrange/processors/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/base.py -------------------------------------------------------------------------------- /docstrange/processors/cloud_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/cloud_processor.py -------------------------------------------------------------------------------- /docstrange/processors/docx_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/docx_processor.py -------------------------------------------------------------------------------- /docstrange/processors/excel_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/excel_processor.py -------------------------------------------------------------------------------- /docstrange/processors/gpu_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/gpu_processor.py -------------------------------------------------------------------------------- /docstrange/processors/html_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/html_processor.py -------------------------------------------------------------------------------- /docstrange/processors/image_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/image_processor.py -------------------------------------------------------------------------------- /docstrange/processors/pdf_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/pdf_processor.py -------------------------------------------------------------------------------- /docstrange/processors/pptx_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/pptx_processor.py -------------------------------------------------------------------------------- /docstrange/processors/txt_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/txt_processor.py -------------------------------------------------------------------------------- /docstrange/processors/url_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/processors/url_processor.py -------------------------------------------------------------------------------- /docstrange/result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/result.py -------------------------------------------------------------------------------- /docstrange/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/services/__init__.py -------------------------------------------------------------------------------- /docstrange/services/auth_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/services/auth_service.py -------------------------------------------------------------------------------- /docstrange/services/ollama_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/services/ollama_service.py -------------------------------------------------------------------------------- /docstrange/static/logo_clean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/static/logo_clean.png -------------------------------------------------------------------------------- /docstrange/static/script.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/static/script.js -------------------------------------------------------------------------------- /docstrange/static/styles.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/static/styles.css -------------------------------------------------------------------------------- /docstrange/templates/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/templates/index.html -------------------------------------------------------------------------------- /docstrange/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/utils/__init__.py -------------------------------------------------------------------------------- /docstrange/utils/gpu_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/utils/gpu_utils.py -------------------------------------------------------------------------------- /docstrange/web_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/docstrange/web_app.py -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/example.py -------------------------------------------------------------------------------- /examples/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/examples/test.py -------------------------------------------------------------------------------- /mcp_server_module/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/mcp_server_module/README.md -------------------------------------------------------------------------------- /mcp_server_module/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/mcp_server_module/__init__.py -------------------------------------------------------------------------------- /mcp_server_module/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/mcp_server_module/__main__.py -------------------------------------------------------------------------------- /mcp_server_module/claude_desktop_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/mcp_server_module/claude_desktop_config.json -------------------------------------------------------------------------------- /mcp_server_module/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/mcp_server_module/server.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/scripts/README.md -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/prepare_s3_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/scripts/prepare_s3_models.py -------------------------------------------------------------------------------- /scripts/setup_dev.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/scripts/setup_dev.py -------------------------------------------------------------------------------- /scripts/setup_environment.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/scripts/setup_environment.sh -------------------------------------------------------------------------------- /tests/debug_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/debug_ocr.py -------------------------------------------------------------------------------- /tests/debug_ocr_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/debug_ocr_provider.py -------------------------------------------------------------------------------- /tests/test_advanced_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_advanced_ocr.py -------------------------------------------------------------------------------- /tests/test_cloud_mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_cloud_mode.py -------------------------------------------------------------------------------- /tests/test_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_converter.py -------------------------------------------------------------------------------- /tests/test_enhanced_layout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_enhanced_layout.py -------------------------------------------------------------------------------- /tests/test_enhanced_library.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_enhanced_library.py -------------------------------------------------------------------------------- /tests/test_enhanced_pdf_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_enhanced_pdf_processor.py -------------------------------------------------------------------------------- /tests/test_html_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_html_generation.py -------------------------------------------------------------------------------- /tests/test_json_structure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_json_structure.py -------------------------------------------------------------------------------- /tests/test_ocr_with_real_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_ocr_with_real_image.py -------------------------------------------------------------------------------- /tests/test_real_files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_real_files.py -------------------------------------------------------------------------------- /tests/test_real_files_enhanced.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_real_files_enhanced.py -------------------------------------------------------------------------------- /tests/test_real_json_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanoNets/docstrange/HEAD/tests/test_real_json_conversion.py --------------------------------------------------------------------------------