├── .dockerignore ├── .env.template ├── .gitignore ├── .python-version ├── Dockerfile ├── README.md ├── compose.yml ├── config ├── credit-ocr-system.conf └── document_types.conf ├── data └── loan_application.pdf ├── database ├── README.md └── schemas │ └── schema.sql ├── docs └── imgs │ ├── 2-ocr-text-extraction-2.png │ ├── 2-ocr-text-extraction-3.png │ ├── 2-ocr-text-extraction.png │ ├── 3-llm-field-extraction-2.png │ ├── 3-llm-field-extraction.png │ ├── 4-function-integration.png │ ├── 6-data-flow-architecture.png │ ├── 7-fast-api-service-architecture.png │ ├── 9-application-ui.png │ └── architecture.png ├── notebooks ├── 1-setup │ ├── 01_setup.ipynb │ └── README.md ├── 2-ocr-based-text-extraction │ ├── 02_ocr_text_extraction.ipynb │ └── README.md ├── 3-llm-field-extraction │ ├── 03_llm_field_extraction.ipynb │ └── README.md ├── 4-function-integration │ ├── 04_integration.ipynb │ └── README.md ├── 5-dms-upload │ ├── 05_dms_upload.ipynb │ └── README.md ├── 6-document-processing-status │ ├── 06_document_processing_status.ipynb │ └── README.md ├── 7-async-processing │ ├── 07_async_processing.ipynb │ └── README.md ├── 8-api-service │ ├── 08_api_service.ipynb │ └── README.md └── 9-application-setup │ ├── README.md │ └── setup.ipynb ├── pyproject.toml ├── run_api.py ├── src ├── __init__.py ├── api │ ├── README.md │ ├── __init__.py │ ├── config.py │ ├── main.py │ ├── models.py │ ├── routes.py │ └── templates │ │ └── index.html ├── async_processing.py ├── celery_app.py ├── config │ ├── __init__.py │ └── system.py ├── dms │ ├── __init__.py │ ├── adapters.py │ ├── environment.py │ ├── interfaces.py │ └── service.py ├── integration │ ├── __init__.py │ ├── orchestration.py │ └── pipeline.py ├── llm │ ├── __init__.py │ ├── client.py │ ├── config.py │ ├── field_extractor.py │ └── validation.py ├── ocr │ ├── __init__.py │ ├── easyocr_client.py │ ├── label_value_extraction.py │ ├── postprocess.py │ └── spatial_analysis.py ├── storage │ ├── __init__.py │ ├── blob_operations.py │ └── storage.py ├── tasks │ ├── __init__.py │ └── pipeline_tasks.py └── visualization │ ├── __init__.py │ └── ocr_visualization.py ├── start_credit_ocr.py ├── tests ├── __init__.py ├── conftest.py ├── test_api.py ├── test_api_simple.py ├── test_dms_integration.py ├── test_integrated_pipeline.py ├── test_notebook04_integration.py ├── test_notebook04_workflow.py └── test_simple_workflow.py └── uv.lock /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/.dockerignore -------------------------------------------------------------------------------- /.env.template: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tasks.md 2 | .venv 3 | *.egg* 4 | build/ 5 | *cache* 6 | *.log -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/Dockerfile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/README.md -------------------------------------------------------------------------------- /compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/compose.yml -------------------------------------------------------------------------------- /config/credit-ocr-system.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/config/credit-ocr-system.conf -------------------------------------------------------------------------------- /config/document_types.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/config/document_types.conf -------------------------------------------------------------------------------- /data/loan_application.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/data/loan_application.pdf -------------------------------------------------------------------------------- /database/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/database/README.md -------------------------------------------------------------------------------- /database/schemas/schema.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/database/schemas/schema.sql -------------------------------------------------------------------------------- /docs/imgs/2-ocr-text-extraction-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/docs/imgs/2-ocr-text-extraction-2.png -------------------------------------------------------------------------------- /docs/imgs/2-ocr-text-extraction-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/docs/imgs/2-ocr-text-extraction-3.png -------------------------------------------------------------------------------- /docs/imgs/2-ocr-text-extraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/docs/imgs/2-ocr-text-extraction.png -------------------------------------------------------------------------------- /docs/imgs/3-llm-field-extraction-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/docs/imgs/3-llm-field-extraction-2.png -------------------------------------------------------------------------------- /docs/imgs/3-llm-field-extraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/docs/imgs/3-llm-field-extraction.png -------------------------------------------------------------------------------- /docs/imgs/4-function-integration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/docs/imgs/4-function-integration.png -------------------------------------------------------------------------------- /docs/imgs/6-data-flow-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/docs/imgs/6-data-flow-architecture.png -------------------------------------------------------------------------------- /docs/imgs/7-fast-api-service-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/docs/imgs/7-fast-api-service-architecture.png -------------------------------------------------------------------------------- /docs/imgs/9-application-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/docs/imgs/9-application-ui.png -------------------------------------------------------------------------------- /docs/imgs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/docs/imgs/architecture.png -------------------------------------------------------------------------------- /notebooks/1-setup/01_setup.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/1-setup/01_setup.ipynb -------------------------------------------------------------------------------- /notebooks/1-setup/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/1-setup/README.md -------------------------------------------------------------------------------- /notebooks/2-ocr-based-text-extraction/02_ocr_text_extraction.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/2-ocr-based-text-extraction/02_ocr_text_extraction.ipynb -------------------------------------------------------------------------------- /notebooks/2-ocr-based-text-extraction/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/2-ocr-based-text-extraction/README.md -------------------------------------------------------------------------------- /notebooks/3-llm-field-extraction/03_llm_field_extraction.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/3-llm-field-extraction/03_llm_field_extraction.ipynb -------------------------------------------------------------------------------- /notebooks/3-llm-field-extraction/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/3-llm-field-extraction/README.md -------------------------------------------------------------------------------- /notebooks/4-function-integration/04_integration.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/4-function-integration/04_integration.ipynb -------------------------------------------------------------------------------- /notebooks/4-function-integration/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/4-function-integration/README.md -------------------------------------------------------------------------------- /notebooks/5-dms-upload/05_dms_upload.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/5-dms-upload/05_dms_upload.ipynb -------------------------------------------------------------------------------- /notebooks/5-dms-upload/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/5-dms-upload/README.md -------------------------------------------------------------------------------- /notebooks/6-document-processing-status/06_document_processing_status.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/6-document-processing-status/06_document_processing_status.ipynb -------------------------------------------------------------------------------- /notebooks/6-document-processing-status/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/6-document-processing-status/README.md -------------------------------------------------------------------------------- /notebooks/7-async-processing/07_async_processing.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/7-async-processing/07_async_processing.ipynb -------------------------------------------------------------------------------- /notebooks/7-async-processing/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/7-async-processing/README.md -------------------------------------------------------------------------------- /notebooks/8-api-service/08_api_service.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/8-api-service/08_api_service.ipynb -------------------------------------------------------------------------------- /notebooks/8-api-service/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/8-api-service/README.md -------------------------------------------------------------------------------- /notebooks/9-application-setup/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/9-application-setup/README.md -------------------------------------------------------------------------------- /notebooks/9-application-setup/setup.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/notebooks/9-application-setup/setup.ipynb -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/pyproject.toml -------------------------------------------------------------------------------- /run_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/run_api.py -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/api/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/api/README.md -------------------------------------------------------------------------------- /src/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/api/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/api/config.py -------------------------------------------------------------------------------- /src/api/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/api/main.py -------------------------------------------------------------------------------- /src/api/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/api/models.py -------------------------------------------------------------------------------- /src/api/routes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/api/routes.py -------------------------------------------------------------------------------- /src/api/templates/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/api/templates/index.html -------------------------------------------------------------------------------- /src/async_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/async_processing.py -------------------------------------------------------------------------------- /src/celery_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/celery_app.py -------------------------------------------------------------------------------- /src/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/config/__init__.py -------------------------------------------------------------------------------- /src/config/system.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/config/system.py -------------------------------------------------------------------------------- /src/dms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/dms/__init__.py -------------------------------------------------------------------------------- /src/dms/adapters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/dms/adapters.py -------------------------------------------------------------------------------- /src/dms/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/dms/environment.py -------------------------------------------------------------------------------- /src/dms/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/dms/interfaces.py -------------------------------------------------------------------------------- /src/dms/service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/dms/service.py -------------------------------------------------------------------------------- /src/integration/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/integration/orchestration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/integration/orchestration.py -------------------------------------------------------------------------------- /src/integration/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/integration/pipeline.py -------------------------------------------------------------------------------- /src/llm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llm/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/llm/client.py -------------------------------------------------------------------------------- /src/llm/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/llm/config.py -------------------------------------------------------------------------------- /src/llm/field_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/llm/field_extractor.py -------------------------------------------------------------------------------- /src/llm/validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/llm/validation.py -------------------------------------------------------------------------------- /src/ocr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ocr/easyocr_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/ocr/easyocr_client.py -------------------------------------------------------------------------------- /src/ocr/label_value_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/ocr/label_value_extraction.py -------------------------------------------------------------------------------- /src/ocr/postprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/ocr/postprocess.py -------------------------------------------------------------------------------- /src/ocr/spatial_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/ocr/spatial_analysis.py -------------------------------------------------------------------------------- /src/storage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/storage/blob_operations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/storage/blob_operations.py -------------------------------------------------------------------------------- /src/storage/storage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/storage/storage.py -------------------------------------------------------------------------------- /src/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/tasks/pipeline_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/tasks/pipeline_tasks.py -------------------------------------------------------------------------------- /src/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/visualization/ocr_visualization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/src/visualization/ocr_visualization.py -------------------------------------------------------------------------------- /start_credit_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/start_credit_ocr.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/tests/test_api.py -------------------------------------------------------------------------------- /tests/test_api_simple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/tests/test_api_simple.py -------------------------------------------------------------------------------- /tests/test_dms_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/tests/test_dms_integration.py -------------------------------------------------------------------------------- /tests/test_integrated_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/tests/test_integrated_pipeline.py -------------------------------------------------------------------------------- /tests/test_notebook04_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/tests/test_notebook04_integration.py -------------------------------------------------------------------------------- /tests/test_notebook04_workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/tests/test_notebook04_workflow.py -------------------------------------------------------------------------------- /tests/test_simple_workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/tests/test_simple_workflow.py -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markuskuehnle/credit-ocr-system/HEAD/uv.lock --------------------------------------------------------------------------------