├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── ci.yml ├── .gitignore ├── LICENSE ├── README.md ├── docker-compose.yml ├── docs ├── architecture.png ├── create-case.png ├── flow.gif └── search-case.png ├── no-ocr-api ├── .env.example ├── .gitattributes ├── Dockerfile ├── data │ ├── AI-Index-Report-2024.pdf │ └── InfraRed-Report.pdf ├── np_ocr │ ├── __init__.py │ ├── api.py │ ├── data.py │ └── search.py ├── requirements.txt └── tests │ ├── lance_vs_qdrant.py │ ├── mock_colpali.py │ └── test_api.py ├── no-ocr-llms ├── llm_serving.py ├── llm_serving_colpali.py └── llm_serving_load_models.py ├── no-ocr-ui ├── .bolt │ ├── config.json │ └── prompt ├── .env.example ├── .gitignore ├── Dockerfile ├── eslint.config.js ├── index.html ├── package-lock.json ├── package.json ├── postcss.config.js ├── src │ ├── App.tsx │ ├── components │ │ ├── About.tsx │ │ ├── Case.tsx │ │ ├── CreateCase.tsx │ │ ├── Navbar.tsx │ │ ├── Search.tsx │ │ ├── about │ │ │ ├── Feature.tsx │ │ │ ├── Features.tsx │ │ │ ├── Hero.tsx │ │ │ └── HowItWorks.tsx │ │ ├── auth │ │ │ ├── AuthGuard.tsx │ │ │ ├── LoginForm.tsx │ │ │ ├── LogoutButton.tsx │ │ │ └── RegisterForm.tsx │ │ ├── collection │ │ │ ├── FileUpload.tsx │ │ │ └── UploadProgress.tsx │ │ ├── collections │ │ │ ├── CaseCard.tsx │ │ │ └── CaseList.tsx │ │ ├── layout │ │ │ ├── NavLink.tsx │ │ │ └── Navbar.tsx │ │ ├── search │ │ │ ├── CollectionSelect.tsx │ │ │ └── SearchResults.tsx │ │ └── shared │ │ │ ├── EmptyState.tsx │ │ │ └── LoadingSpinner.tsx │ ├── config │ │ ├── api.ts │ │ └── supabase.ts │ ├── env.d.ts │ ├── hooks │ │ └── useAuth.ts │ ├── index.css │ ├── lib │ │ └── supabase.ts │ ├── main.tsx │ ├── stores │ │ └── authStore.ts │ ├── types │ │ ├── auth.ts │ │ ├── collection.ts │ │ └── index.ts │ ├── utils │ │ └── date.ts │ └── vite-env.d.ts ├── tailwind.config.js ├── tsconfig.app.json ├── tsconfig.json ├── tsconfig.node.json └── vite.config.ts └── pyproject.toml /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Create a report to help us improve 4 | title: '[BUG] ' 5 | labels: bug 6 | assignees: '' 7 | --- 8 | 9 | **Describe the bug** 10 | A clear and concise description of what the bug is. 11 | 12 | **To Reproduce** 13 | Steps to reproduce the behavior: 14 | 1. Go to '...' 15 | 2. Click on '....' 16 | 3. Scroll down to '....' 17 | 4. See error 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Screenshots** 23 | If applicable, add screenshots to help explain your problem. 24 | 25 | **Environment (please complete the following information):** 26 | - OS: [e.g. iOS] 27 | - Browser: [e.g. chrome, safari] 28 | - Version: [e.g. 22] 29 | 30 | **Additional context** 31 | Add any other context about the problem here. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '[FEATURE]' 5 | labels: enhancement 6 | assignees: '' 7 | --- 8 | 9 | **Is your feature request related to a problem? Please describe.** 10 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 11 | 12 | **Describe the solution you'd like** 13 | A clear and concise description of what you want to happen. 14 | 15 | **Describe alternatives you've considered** 16 | A clear and concise description of any alternative solutions or features you've considered. 17 | 18 | **Additional context** 19 | Add any other context or screenshots about the feature request here. -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - no-ocr-dev 8 | pull_request: 9 | branches: 10 | - main 11 | release: 12 | types: [published] 13 | 14 | jobs: 15 | docker-build: 16 | runs-on: ubuntu-latest 17 | permissions: 18 | contents: read 19 | packages: write 20 | steps: 21 | 22 | - name: Checkout repository 23 | uses: actions/checkout@v4 24 | 25 | - name: Set up Docker Buildx 26 | uses: docker/setup-buildx-action@v2 27 | 28 | - name: Log in to the Container registry 29 | uses: docker/login-action@v3 30 | with: 31 | registry: ghcr.io 32 | username: ${{ github.actor }} 33 | password: ${{ secrets.GITHUB_TOKEN }} 34 | 35 | 36 | - name: Build and push docker image UI 37 | uses: docker/build-push-action@v6 38 | with: 39 | context: no-ocr-ui 40 | push: true 41 | tags: ghcr.io/kyryl-opens-ml/no-ocr-ui:latest 42 | build-args: | 43 | VITE_SUPABASE_URL=${{ secrets.VITE_SUPABASE_URL }} 44 | VITE_SUPABASE_ANON_KEY=${{ secrets.VITE_SUPABASE_ANON_KEY }} 45 | VITE_REACT_APP_API_URI=${{ secrets.VITE_REACT_APP_API_URI }} 46 | cache-from: type=registry,ref=ghcr.io/kyryl-opens-ml/no-ocr-ui:buildcache 47 | cache-to: type=registry,ref=ghcr.io/kyryl-opens-ml/no-ocr-ui:buildcache,mode=max 48 | 49 | - name: Build and push docker image API 50 | uses: docker/build-push-action@v6 51 | with: 52 | context: no-ocr-api 53 | push: true 54 | tags: ghcr.io/kyryl-opens-ml/no-ocr-api:latest 55 | cache-from: type=registry,ref=ghcr.io/kyryl-opens-ml/no-ocr-api:buildcache 56 | cache-to: type=registry,ref=ghcr.io/kyryl-opens-ml/no-ocr-api:buildcache,mode=max 57 | 58 | deploy: 59 | runs-on: ubuntu-latest 60 | needs: [docker-build] 61 | steps: 62 | - name: Checkout 63 | uses: actions/checkout@v4 64 | 65 | - name: Install Railway 66 | run: rm -rf package-lock.json && npm i -g @railway/cli 67 | 68 | - name: Deploy UI 69 | run: railway redeploy --service no-ocr-ui --yes 70 | env: 71 | RAILWAY_TOKEN: ${{ secrets.RAILWAY_TOKEN }} 72 | 73 | - name: Deploy API 74 | run: railway redeploy --service no-ocr-api --yes 75 | env: 76 | RAILWAY_TOKEN: ${{ secrets.RAILWAY_TOKEN }} 77 | 78 | docker-build-release: 79 | runs-on: ubuntu-latest 80 | if: github.event_name == 'release' 81 | steps: 82 | - name: Checkout repository 83 | uses: actions/checkout@v4 84 | 85 | - name: Set up Docker Buildx 86 | uses: docker/setup-buildx-action@v2 87 | 88 | - name: Log in to the Container registry 89 | uses: docker/login-action@v3 90 | with: 91 | registry: ghcr.io 92 | username: ${{ github.actor }} 93 | password: ${{ secrets.GITHUB_TOKEN }} 94 | 95 | - name: Build and push docker image UI with release tag 96 | uses: docker/build-push-action@v6 97 | with: 98 | context: no-ocr-ui 99 | push: true 100 | tags: ghcr.io/kyryl-opens-ml/no-ocr-ui:${{ github.event.release.tag_name }} 101 | build-args: | 102 | VITE_SUPABASE_URL=${{ secrets.VITE_SUPABASE_URL }} 103 | VITE_SUPABASE_ANON_KEY=${{ secrets.VITE_SUPABASE_ANON_KEY }} 104 | VITE_REACT_APP_API_URI=${{ secrets.VITE_REACT_APP_API_URI }} 105 | cache-from: type=registry,ref=ghcr.io/kyryl-opens-ml/no-ocr-ui:buildcache 106 | cache-to: type=registry,ref=ghcr.io/kyryl-opens-ml/no-ocr-ui:buildcache,mode=max 107 | 108 | - name: Build and push docker image API with release tag 109 | uses: docker/build-push-action@v6 110 | with: 111 | context: no-ocr-api 112 | push: true 113 | tags: ghcr.io/kyryl-opens-ml/no-ocr-api:${{ github.event.release.tag_name }} 114 | cache-from: type=registry,ref=ghcr.io/kyryl-opens-ml/no-ocr-api:buildcache 115 | cache-to: type=registry,ref=ghcr.io/kyryl-opens-ml/no-ocr-api:buildcache,mode=max -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | README.p.md 164 | colpali/ 165 | data/ 166 | .DS_Store 167 | no-ocr-api/storage 168 | example/ 169 | no-ocr-api/vllm_cache/ 170 | RDEV.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # No OCR 2 | 3 | A simple tool for exploring documents with AI, no fancy text extraction required. Just upload your files, then quickly search or ask questions about content across multiple collections. 4 | 5 | ## Release blog with details 6 | 7 | Here is a blog with release details about this project: [No-OCR Product](https://kyrylai.com/2025/01/10/no-ocr-product/) 8 | 9 | ## Demo 10 | 11 | Here's a quick GIF demonstrating the basic flow of using No OCR: 12 | 13 | ![No OCR Flow](./docs/flow.gif) 14 | 15 | > **Table of Contents** 16 | > 1. [Overview](#overview) 17 | > 2. [Key Features](#key-features) 18 | > 3. [Architecture](#architecture) 19 | > 4. [Flow](#flow) 20 | > 5. [Roadmap](#roadmap) 21 | > 6. [Prerequisites](#prerequisites) 22 | > 7. [Dev Installation](#dev-installation) 23 | 24 | ## Overview 25 | 26 | The core purpose of "No OCR" is to simplify AI-based PDF processing: 27 | - Process and store PDF pages without relying on OCR. 28 | - Perform text and/or visual queries using modern embeddings. 29 | - Use open source models for advanced question-answering on document-based diagrams, text, and more. 30 | 31 | ## Key Features 32 | 33 | - Create and manage PDF/document collections, also referred to as "cases". 34 | - Automated ingestion to build Hugging Face-style datasets (HF_Dataset). 35 | - Vector-based search over PDF pages (and relevant images) in LanceDB. 36 | - Visual question-answering on images and diagrams via Qwen2-VL. 37 | - Deployable via Docker for both the backend (Python) and UI (React). 38 | 39 | ## Architecture 40 | 41 | Below is a high-level workflow overview: 42 | 43 | ![Architecture](./docs/architecture.png) 44 | 45 | ## Flow 46 | 47 | Create case: 48 | 49 | ```mermaid 50 | sequenceDiagram 51 | participant User 52 | participant no-ocr-ui (CreateCase) 53 | participant no-ocr-api 54 | participant HF_Dataset 55 | participant SearchClient 56 | participant LanceDB 57 | 58 | User->>no-ocr-ui (CreateCase): Upload PDFs & specify case name 59 | no-ocr-ui (CreateCase)->>no-ocr-api: POST /create_case with PDFs 60 | no-ocr-api->>no-ocr-api: Save PDFs to local storage 61 | no-ocr-api->>no-ocr-api: Spawn background task (process_case) 62 | no-ocr-api->>HF_Dataset: Convert PDFs to HF dataset 63 | HF_Dataset-->>no-ocr-api: Return dataset 64 | no-ocr-api->>SearchClient: Ingest dataset 65 | SearchClient->>LanceDB: Create collection & upload points 66 | LanceDB-->>SearchClient: Acknowledge ingestion 67 | SearchClient-->>no-ocr-api: Done ingestion 68 | no-ocr-api->>no-ocr-api: Mark case status as 'done' 69 | no-ocr-api-->>no-ocr-ui (CreateCase): Return creation response 70 | no-ocr-ui (CreateCase)-->>User: Display success message 71 | ``` 72 | 73 | Search: 74 | 75 | ```mermaid 76 | sequenceDiagram 77 | participant User 78 | participant no-ocr-ui 79 | participant SearchClient 80 | participant LanceDB 81 | participant HF_Dataset 82 | participant VLLM 83 | 84 | User->>no-ocr-ui: Enter search query and select case 85 | no-ocr-ui->>SearchClient: Search images by text 86 | SearchClient->>LanceDB: Query collection with text embedding 87 | LanceDB-->>SearchClient: Return search results 88 | SearchClient-->>no-ocr-ui: Provide search results 89 | no-ocr-ui->>HF_Dataset: Load dataset for collection 90 | HF_Dataset-->>no-ocr-ui: Return dataset 91 | no-ocr-ui->>VLLM: Process images with VLLM 92 | VLLM-->>no-ocr-ui: Return VLLM output 93 | no-ocr-ui-->>User: Display search results and VLLM output 94 | ``` 95 | 96 | ## Roadmap 97 | 98 | - Better models for reasoning and retrieval 72B and QVQ. 99 | - Agentic workflows - go beyond search and toward complete peace of work. 100 | - Training models per case - turn your workflow into data moat and train unique models. 101 | - UI/UX improvement - simplify, simplify, simplify. 102 | 103 | 104 | ## Prerequisites 105 | - Python 3.x 106 | - Node.js 18.x 107 | - Docker (optional for containerized deployments) 108 | - Superbase 109 | - Create an account at https://app.supabase.io/ 110 | - Create a `.env` file in the `no-ocr-ui` directory 111 | - Add the following variables to the `.env` file: 112 | ``` 113 | VITE_SUPABASE_URL="" 114 | VITE_SUPABASE_ANON_KEY="" 115 | VITE_REACT_APP_API_URI="" 116 | ``` 117 | - Modal 118 | - Create an account at https://modal.com/ 119 | - Deploy models: 120 | ```bash 121 | pip install modal 122 | modal setup 123 | 124 | modal run no-ocr-llms/llm_serving_load_models.py --model-name Qwen/Qwen2-VL-7B-Instruct --model-revision 51c47430f97dd7c74aa1fa6825e68a813478097f 125 | modal run no-ocr-llms/llm_serving_load_models.py --model-name vidore/colqwen2-v1.0-merged --model-revision 364a4f5df97231e233e15cbbaf0b9dbe352ba92c 126 | 127 | 128 | modal deploy no-ocr-llms/llm_serving.py 129 | modal deploy no-ocr-llms/llm_serving_colpali.py 130 | ``` 131 | - Create a `.env` file in the `no-ocr-api` directory 132 | - Update the environment variables. 133 | 134 | ## Dev Installation 135 | 136 | 1. Clone the repository: 137 | ```bash 138 | git clone https://github.com/kyryl-opens-ml/no-ocr 139 | ``` 140 | 141 | 2. (API) Install dependencies: 142 | ```bash 143 | cd no-ocr-api 144 | pip install -r requirements.txt 145 | ``` 146 | 147 | 2. (API) Run server: 148 | ```bash 149 | cd no-ocr-api 150 | fastapi dev api.py 151 | ``` 152 | 153 | 4. (UI) Install dependencies: 154 | ```bash 155 | cd no-ocr-ui 156 | npm install 157 | ``` 158 | 4. (UI) Run UI: 159 | ```bash 160 | cd no-ocr-ui 161 | npm run dev 162 | ``` 163 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | ui: 5 | build: 6 | context: ./no-ocr-ui 7 | dockerfile: Dockerfile 8 | args: 9 | VITE_SUPABASE_URL: "" 10 | VITE_SUPABASE_ANON_KEY: "" 11 | VITE_REACT_APP_API_URI: "http://localhost:8000" 12 | env_file: 13 | - ./no-ocr-ui/.env 14 | ports: 15 | - "5173:5173" 16 | depends_on: 17 | - api 18 | 19 | api: 20 | build: 21 | context: ./no-ocr-api 22 | dockerfile: Dockerfile 23 | env_file: 24 | - ./no-ocr-api/.env 25 | volumes: 26 | - api-storage:/app/storage 27 | ports: 28 | - "8000:8000" 29 | depends_on: 30 | - qdrant 31 | environment: 32 | QDRANT_HOST: "qdrant" 33 | 34 | qdrant: 35 | image: qdrant/qdrant:v1.12.5 36 | volumes: 37 | - qdrant-storage:/qdrant/storage 38 | ports: 39 | - "6333:6333" 40 | 41 | volumes: 42 | api-storage: 43 | qdrant-storage: 44 | -------------------------------------------------------------------------------- /docs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyryl-opens-ml/no-ocr/5090ddb47335e3df2529d152f86eec49d3634cff/docs/architecture.png -------------------------------------------------------------------------------- /docs/create-case.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyryl-opens-ml/no-ocr/5090ddb47335e3df2529d152f86eec49d3634cff/docs/create-case.png -------------------------------------------------------------------------------- /docs/flow.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyryl-opens-ml/no-ocr/5090ddb47335e3df2529d152f86eec49d3634cff/docs/flow.gif -------------------------------------------------------------------------------- /docs/search-case.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyryl-opens-ml/no-ocr/5090ddb47335e3df2529d152f86eec49d3634cff/docs/search-case.png -------------------------------------------------------------------------------- /no-ocr-api/.env.example: -------------------------------------------------------------------------------- 1 | STORAGE_DIR="storage" 2 | CASE_INFO_FILENAME="case_info.json" 3 | HF_DATASET_DIRNAME="hf_dataset" 4 | SEARCH_TOP_K=3 5 | COLPALI_TOKEN= 6 | VLLM_URL= 7 | COLPALI_BASE_URL= 8 | VECTOR_SIZE=128 9 | -------------------------------------------------------------------------------- /no-ocr-api/.gitattributes: -------------------------------------------------------------------------------- 1 | data/AI-Index-Report-2024.pdf filter=lfs diff=lfs merge=lfs -text 2 | data/InfraRed-Report.pdf filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /no-ocr-api/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | ARG DEBIAN_FRONTEND=noninteractive 3 | 4 | ENV TZ=America/Los_Angeles 5 | 6 | RUN apt-get update && apt-get install -y --no-install-recommends \ 7 | build-essential \ 8 | ffmpeg \ 9 | git \ 10 | git-lfs \ 11 | poppler-utils 12 | 13 | WORKDIR /app 14 | 15 | RUN pip install --upgrade pip 16 | COPY requirements.txt requirements.txt 17 | RUN pip install -r requirements.txt 18 | 19 | # TODO: replace with lancedb==0.18.1b1 20 | RUN pip install --pre --extra-index-url https://pypi.fury.io/lancedb/ lancedb==0.18.1b1 21 | 22 | COPY . . 23 | ENV PYTHONPATH /app/ 24 | 25 | CMD fastapi run --host 0.0.0.0 --port 8000 --workers 1 np_ocr/api.py 26 | -------------------------------------------------------------------------------- /no-ocr-api/data/AI-Index-Report-2024.pdf: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ab78e4b640fe7485ed2b6462605d84247ab3fa53dc8b75f3e8ccfc5f24171d74 3 | size 44124954 4 | -------------------------------------------------------------------------------- /no-ocr-api/data/InfraRed-Report.pdf: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:569c84913f06887f2b147b0f229ffc859f37177fcb5c16cc9e35bfa836f08ffa 3 | size 15479939 4 | -------------------------------------------------------------------------------- /no-ocr-api/np_ocr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kyryl-opens-ml/no-ocr/5090ddb47335e3df2529d152f86eec49d3634cff/no-ocr-api/np_ocr/__init__.py -------------------------------------------------------------------------------- /no-ocr-api/np_ocr/api.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import logging 4 | import os 5 | import shutil 6 | import time 7 | from io import BytesIO 8 | from pathlib import Path 9 | from typing import List 10 | 11 | import diskcache as dc 12 | from datasets import load_from_disk 13 | from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, UploadFile 14 | from fastapi.middleware.cors import CORSMiddleware 15 | from pydantic import BaseModel 16 | from pydantic_settings import BaseSettings 17 | 18 | from np_ocr.data import pdfs_to_hf_dataset 19 | from np_ocr.search import SearchClient, call_vllm 20 | 21 | 22 | class CustomRailwayLogFormatter(logging.Formatter): 23 | def format(self, record): 24 | log_record = { 25 | "time": self.formatTime(record), 26 | "level": record.levelname, 27 | "message": record.getMessage() 28 | } 29 | return json.dumps(log_record) 30 | 31 | def get_logger(): 32 | logger = logging.getLogger() 33 | logger.setLevel(logging.INFO) 34 | handler = logging.StreamHandler() 35 | for handler in logger.handlers[:]: 36 | logger.removeHandler(handler) 37 | formatter = CustomRailwayLogFormatter() 38 | handler.setFormatter(formatter) 39 | logger.addHandler(handler) 40 | return logger 41 | 42 | logger = get_logger() 43 | 44 | app = FastAPI() 45 | 46 | app.add_middleware( 47 | CORSMiddleware, 48 | allow_origins=["*"], 49 | allow_credentials=True, 50 | allow_methods=["*"], 51 | allow_headers=["*"], 52 | ) 53 | 54 | 55 | class Settings(BaseSettings): 56 | STORAGE_DIR: str = "storage" 57 | CASE_INFO_FILENAME: str = "case_info.json" 58 | HF_DATASET_DIRNAME: str = "hf_dataset" 59 | SEARCH_TOP_K: int = 3 60 | COLPALI_TOKEN: str 61 | VLLM_URL: str 62 | COLPALI_BASE_URL: str 63 | VECTOR_SIZE: int = 128 64 | VLLM_API_KEY: str 65 | VLLM_MODEL: str = "Qwen2-VL-7B-Instruct" 66 | 67 | class Config: 68 | env_file = ".env" 69 | 70 | 71 | settings = Settings() 72 | 73 | 74 | class SearchResult(BaseModel): 75 | score: float 76 | pdf_name: str 77 | pdf_page: int 78 | image_base64: str 79 | 80 | class SearchResponse(BaseModel): 81 | search_results: List[SearchResult] 82 | 83 | class ImageAnswer(BaseModel): 84 | answer: str 85 | 86 | class CaseInfo(BaseModel): 87 | name: str 88 | status: str 89 | number_of_PDFs: int 90 | files: List[str] 91 | case_dir: Path 92 | 93 | def save(self): 94 | with open(self.case_dir / settings.CASE_INFO_FILENAME, "w") as json_file: 95 | json.dump(self.model_dump(), json_file, default=str) 96 | 97 | def update_status(self, new_status: str): 98 | self.status = new_status 99 | self.save() 100 | 101 | 102 | search_client = SearchClient(storage_dir=settings.STORAGE_DIR, vector_size=settings.VECTOR_SIZE, base_url=settings.COLPALI_BASE_URL, token=settings.COLPALI_TOKEN) 103 | 104 | 105 | @app.post("/vllm_call") 106 | def vllm_call( 107 | user_query: str = Form(...), user_id: str = Form(...), case_name: str = Form(...), pdf_name: str = Form(...), pdf_page: int = Form(...) 108 | ) -> ImageAnswer: 109 | logger.info("start vllm_call") 110 | start_time = time.time() 111 | 112 | """ 113 | Given a user ID, collection name, PDF name, and PDF page number, retrieve the corresponding image 114 | from the HF dataset and call the VLLM function with this image. 115 | """ 116 | dataset_path = os.path.join(settings.STORAGE_DIR, user_id, case_name, settings.HF_DATASET_DIRNAME) 117 | if not os.path.exists(dataset_path): 118 | raise HTTPException(status_code=404, detail="Dataset for this case not found.") 119 | 120 | dataset = load_from_disk(dataset_path) 121 | image_data = None 122 | 123 | for data in dataset: 124 | if data["pdf_name"] == pdf_name and data["pdf_page"] == pdf_page: 125 | image_data = data["image"] 126 | break 127 | 128 | if image_data is None: 129 | raise HTTPException( 130 | status_code=404, detail="Image not found in the dataset for the given PDF name and page number." 131 | ) 132 | 133 | image_answer = call_vllm(image_data, user_query, settings.VLLM_URL, settings.VLLM_API_KEY, settings.VLLM_MODEL) 134 | 135 | end_time = time.time() 136 | logger.info(f"done vllm_call, total time {end_time - start_time}") 137 | 138 | return image_answer 139 | 140 | 141 | @app.post("/search", response_model=SearchResponse) 142 | def ai_search(user_query: str = Form(...), user_id: str = Form(...), case_name: str = Form(...)): 143 | logger.info("start ai_search") 144 | start_time = time.time() 145 | 146 | """ 147 | Given a user query, user ID, and case name, search relevant images in the Qdrant index 148 | and return both the results and an LLM interpretation. 149 | """ 150 | if not os.path.exists(settings.STORAGE_DIR): 151 | raise HTTPException(status_code=404, detail="No collections found.") 152 | 153 | case_info_path = os.path.join(settings.STORAGE_DIR, user_id, case_name, settings.CASE_INFO_FILENAME) 154 | if not os.path.exists(case_info_path): 155 | raise HTTPException(status_code=404, detail="Case info not found.") 156 | 157 | with open(case_info_path, "r") as json_file: 158 | _ = json.load(json_file) # case_info is not used directly below 159 | 160 | search_results = search_client.search_images_by_text(user_query, case_name=case_name, user_id=user_id, top_k=settings.SEARCH_TOP_K) 161 | if not search_results: 162 | return {"message": "No results found."} 163 | 164 | dataset_path = os.path.join(settings.STORAGE_DIR, user_id, case_name, settings.HF_DATASET_DIRNAME) 165 | if not os.path.exists(dataset_path): 166 | raise HTTPException(status_code=404, detail="Dataset for this case not found.") 167 | 168 | dataset = load_from_disk(dataset_path) 169 | search_results_data = [] 170 | print(search_results) 171 | for point in search_results: 172 | logger.info(point) 173 | score = point['_distance'] 174 | index = point['index'] 175 | image_data = dataset[index]["image"] 176 | pdf_name = dataset[index]["pdf_name"] 177 | pdf_page = dataset[index]["pdf_page"] 178 | 179 | # Convert image to base64 string 180 | buffered = BytesIO() 181 | image_data.save(buffered, format="JPEG") 182 | img_b64_str = base64.b64encode(buffered.getvalue()).decode("utf-8") 183 | 184 | search_results_data.append(SearchResult( 185 | score=score, 186 | pdf_name=pdf_name, 187 | pdf_page=pdf_page, 188 | image_base64=img_b64_str 189 | )) 190 | 191 | end_time = time.time() 192 | logger.info(f"done ai_search, total time {end_time - start_time}") 193 | 194 | return SearchResponse(search_results=search_results_data) 195 | 196 | 197 | def process_case(case_info: CaseInfo, user_id: str): 198 | logger.info("start post_process_case") 199 | start_time = time.time() 200 | 201 | dataset = pdfs_to_hf_dataset(case_info.case_dir) 202 | dataset.save_to_disk(case_info.case_dir / settings.HF_DATASET_DIRNAME) 203 | search_client.ingest(case_info.name, dataset, user_id) 204 | 205 | case_info.update_status("done") 206 | 207 | end_time = time.time() 208 | logger.info(f"done process_case, total time {end_time - start_time}") 209 | 210 | 211 | 212 | @app.post("/create_case") 213 | def create_new_case( 214 | user_id: str = Form(...), 215 | files: List[UploadFile] = File(...), 216 | case_name: str = Form(...), 217 | background_tasks: BackgroundTasks = BackgroundTasks(), 218 | ) -> CaseInfo: 219 | logger.info("start create_new_case") 220 | start_time = time.time() 221 | 222 | """ 223 | Create a new case for a specific user, store the uploaded PDFs, and process/ingest them. 224 | """ 225 | if not files or not case_name or not user_id: 226 | raise HTTPException(status_code=400, detail="No files, case name, or user ID provided.") 227 | 228 | case_dir = Path(f"{settings.STORAGE_DIR}/{user_id}/{case_name}") 229 | case_dir.mkdir(parents=True, exist_ok=True) 230 | 231 | file_names = [] 232 | for uploaded_file in files: 233 | file_path = os.path.join(case_dir, uploaded_file.filename) 234 | with open(file_path, "wb") as f: 235 | f.write(uploaded_file.file.read()) 236 | file_names.append(uploaded_file.filename) 237 | 238 | case_info = CaseInfo( 239 | name=case_name, 240 | status="processing", 241 | number_of_PDFs=len(files), 242 | files=file_names, 243 | case_dir=case_dir, 244 | ) 245 | case_info.save() 246 | 247 | 248 | background_tasks.add_task(process_case, case_info=case_info, user_id=user_id) 249 | 250 | end_time = time.time() 251 | logger.info(f"done create_new_case, total time {end_time - start_time}") 252 | 253 | return case_info 254 | 255 | 256 | @app.get("/get_cases") 257 | def get_cases(user_id: str): 258 | logger.info("start get_cases") 259 | start_time = time.time() 260 | 261 | """ 262 | Return a list of all previously uploaded cases for a specific user with their metadata. 263 | """ 264 | user_storage_dir = os.path.join(settings.STORAGE_DIR, user_id) 265 | if not os.path.exists(user_storage_dir): 266 | return {"message": "No cases found.", "cases": []} 267 | 268 | cases = os.listdir(user_storage_dir) 269 | case_data = [] 270 | 271 | for case in cases: 272 | case_info_path = os.path.join(user_storage_dir, case, settings.CASE_INFO_FILENAME) 273 | if os.path.exists(case_info_path): 274 | with open(case_info_path, "r") as json_file: 275 | case_info = CaseInfo(**json.load(json_file)) 276 | case_data.append(case_info.dict()) 277 | 278 | # Add common cases 279 | common_cases_dir = os.path.join(settings.STORAGE_DIR, "common_cases") 280 | if os.path.exists(common_cases_dir): 281 | common_cases = os.listdir(common_cases_dir) 282 | for case in common_cases: 283 | case_info_path = os.path.join(common_cases_dir, case, settings.CASE_INFO_FILENAME) 284 | if os.path.exists(case_info_path): 285 | with open(case_info_path, "r") as json_file: 286 | case_info = CaseInfo(**json.load(json_file)) 287 | case_data.append(case_info.dict()) 288 | 289 | if not case_data: 290 | return {"message": "No case data found.", "cases": []} 291 | 292 | end_time = time.time() 293 | logger.info(f"done get_cases, total time {end_time - start_time}") 294 | 295 | return {"cases": case_data} 296 | 297 | 298 | @app.get("/get_case/{case_name}") 299 | def get_case(user_id: str, case_name: str) -> CaseInfo: 300 | case_info_path = os.path.join(settings.STORAGE_DIR, user_id, case_name, settings.CASE_INFO_FILENAME) 301 | if not os.path.exists(case_info_path): 302 | # Check common cases 303 | case_info_path = os.path.join(settings.STORAGE_DIR, "common_cases", case_name, settings.CASE_INFO_FILENAME) 304 | if not os.path.exists(case_info_path): 305 | raise HTTPException(status_code=404, detail="Case info not found.") 306 | 307 | with open(case_info_path, "r") as json_file: 308 | case_info = CaseInfo(**json.load(json_file)) 309 | return case_info 310 | 311 | @app.delete("/delete_case/{case_name}") 312 | def delete_case(user_id: str, case_name: str): 313 | logger.info("start delete_case") 314 | start_time = time.time() 315 | 316 | """ 317 | Delete a specific case for a specific user. 318 | """ 319 | # Delete the case from storage 320 | case_dir = os.path.join(settings.STORAGE_DIR, user_id, case_name) 321 | if os.path.exists(case_dir): 322 | shutil.rmtree(case_dir) 323 | else: 324 | raise HTTPException(status_code=404, detail="Case not found in storage.") 325 | 326 | end_time = time.time() 327 | logger.info(f"done delete_case, total time {end_time - start_time}") 328 | 329 | return {"message": f"Case '{case_name}' has been deleted."} 330 | 331 | 332 | @app.get("/health") 333 | def health_check(): 334 | return {"status": "ok"} 335 | -------------------------------------------------------------------------------- /no-ocr-api/np_ocr/data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | import tracemalloc 4 | from pathlib import Path 5 | 6 | from datasets import Dataset 7 | from pdf2image import convert_from_path 8 | from pypdf import PdfReader 9 | from tqdm import tqdm 10 | 11 | logger = logging.getLogger() 12 | 13 | 14 | def get_pdf_images(pdf_path): 15 | logger.info("start get_pdf_images") 16 | start_time = time.time() 17 | 18 | reader = PdfReader(pdf_path) 19 | page_texts = [] 20 | for page_number in range(len(reader.pages)): 21 | page = reader.pages[page_number] 22 | text = page.extract_text() 23 | page_texts.append(text) 24 | # Convert to PIL images 25 | images = convert_from_path( 26 | pdf_path, dpi=150, fmt="jpeg", jpegopt={"quality": 100, "progressive": True, "optimize": True} 27 | ) 28 | assert len(images) == len(page_texts) 29 | 30 | end_time = time.time() 31 | logger.info(f"done get_pdf_images, total time {end_time - start_time}") 32 | 33 | return images, page_texts 34 | 35 | 36 | def pdfs_to_hf_dataset(path_to_folder): 37 | logger.info("start pdfs_to_hf_dataset") 38 | start_time = time.time() 39 | 40 | tracemalloc.start() # Start tracing memory allocations 41 | 42 | data = [] 43 | global_index = 0 44 | 45 | folder_path = Path(path_to_folder) 46 | pdf_files = list(folder_path.glob("*.pdf")) 47 | for pdf_file in tqdm(pdf_files, desc="Processing PDFs"): 48 | images, page_texts = get_pdf_images(str(pdf_file)) 49 | 50 | for page_number, (image, text) in enumerate(zip(images, page_texts)): 51 | data.append( 52 | { 53 | "image": image, 54 | "index": global_index, 55 | "pdf_name": pdf_file.name, 56 | "pdf_page": page_number + 1, 57 | "page_text": text, 58 | } 59 | ) 60 | global_index += 1 61 | # Print memory usage after processing each image 62 | current, peak = tracemalloc.get_traced_memory() 63 | 64 | # Print memory usage after processing each PDF 65 | current, peak = tracemalloc.get_traced_memory() 66 | logger.info(f"PDF: Current memory usage is {current / 10**6}MB; Peak was {peak / 10**6}MB") 67 | 68 | current, peak = tracemalloc.get_traced_memory() 69 | logger.info(f"TOTAL: Current memory usage is {current / 10**6}MB; Peak was {peak / 10**6}MB") 70 | tracemalloc.stop() # Stop tracing memory allocations 71 | 72 | logger.info("Done processing") 73 | dataset = Dataset.from_list(data) 74 | logger.info("Done converting to dataset") 75 | 76 | end_time = time.time() 77 | logger.info(f"done pdfs_to_hf_dataset, total time {end_time - start_time}") 78 | 79 | return dataset 80 | -------------------------------------------------------------------------------- /no-ocr-api/np_ocr/search.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | import json 4 | import logging 5 | import time 6 | from io import BytesIO 7 | from pathlib import Path 8 | from typing import List 9 | 10 | import PIL 11 | import requests 12 | from openai import OpenAI 13 | from pydantic import BaseModel 14 | import lancedb 15 | import numpy as np 16 | import pyarrow as pa 17 | from tqdm import tqdm 18 | 19 | logger = logging.getLogger() 20 | 21 | class ImageAnswer(BaseModel): 22 | answer: str 23 | 24 | class CaseInfo(BaseModel): 25 | name: str 26 | unique_name: str 27 | status: str 28 | number_of_PDFs: int 29 | files: List[str] 30 | case_dir: Path 31 | 32 | def save(self, case_info_filename: str): 33 | with open(self.case_dir / case_info_filename, "w") as json_file: 34 | json.dump(self.model_dump(), json_file, default=str) 35 | 36 | def update_status(self, new_status: str, case_info_filename: str): 37 | self.status = new_status 38 | self.save(case_info_filename) 39 | 40 | 41 | 42 | class ColPaliClient: 43 | def __init__(self, base_url: str, token: str): 44 | self.base_url = base_url 45 | self.headers = {"Authorization": f"Bearer {token}"} 46 | 47 | def query_text(self, query_text: str): 48 | response = requests.post(f"{self.base_url}/query", headers=self.headers, params={"query_text": query_text}) 49 | response.raise_for_status() 50 | return response.json() 51 | 52 | def process_image(self, image_path: str): 53 | with open(image_path, "rb") as image_file: 54 | files = {"image": image_file} 55 | response = requests.post(f"{self.base_url}/process_image", files=files, headers=self.headers) 56 | response.raise_for_status() 57 | return response.json() 58 | 59 | def process_pil_image(self, pil_image): 60 | buffered = io.BytesIO() 61 | pil_image.save(buffered, format="JPEG") 62 | files = {"image": buffered.getvalue()} 63 | response = requests.post(f"{self.base_url}/process_image", files=files, headers=self.headers) 64 | response.raise_for_status() 65 | return response.json() 66 | 67 | class SearchClient: 68 | def __init__(self, storage_dir: str, vector_size: int, base_url: str, token: str): 69 | self.storage_dir = storage_dir 70 | self.vector_size = vector_size 71 | self.colpali_client = ColPaliClient(base_url, token) 72 | 73 | def ingest(self, case_name: str, dataset, user_id: str): 74 | logger.info("start ingest") 75 | start_time = time.time() 76 | 77 | schema = pa.schema( 78 | [ 79 | pa.field("index", pa.int64()), 80 | pa.field("pdf_name", pa.string()), 81 | pa.field("pdf_page", pa.int64()), 82 | pa.field("vector", pa.list_(pa.list_(pa.float32(), self.vector_size))), 83 | ] 84 | ) 85 | lance_client = lancedb.connect(f"{self.storage_dir}/{user_id}/{case_name}") 86 | tbl = lance_client.create_table(case_name, schema=schema) 87 | 88 | # TODO: ingest in batches 89 | 90 | with tqdm(total=len(dataset), desc="Indexing Progress") as pbar: 91 | for i in range(len(dataset)): 92 | image = dataset[i]["image"] 93 | response = self.colpali_client.process_pil_image(image) 94 | image_embedding = response["embedding"] 95 | 96 | data = { 97 | "index": dataset[i]["index"], 98 | "pdf_name": dataset[i]["pdf_name"], 99 | "pdf_page": dataset[i]["pdf_page"], 100 | "vector": image_embedding, 101 | } 102 | 103 | try: 104 | tbl.add([data]) 105 | except Exception as e: 106 | logger.error(f"Error during upsert: {e}") 107 | continue 108 | pbar.update(1) 109 | 110 | tbl.create_index(metric="cosine") 111 | 112 | logger.info("Indexing complete!") 113 | end_time = time.time() 114 | logger.info(f"done ingest, total time {end_time - start_time}") 115 | 116 | def search_images_by_text(self, query_text, case_name: str, user_id: str,top_k: int): 117 | logger.info("start search_images_by_text") 118 | start_time = time.time() 119 | 120 | lance_client = lancedb.connect(f"{self.storage_dir}/{user_id}/{case_name}") 121 | tbl = lance_client.open_table(case_name) 122 | 123 | query_embedding = self.colpali_client.query_text(query_text) 124 | multivector_query = np.array(query_embedding["embedding"]) 125 | search_result = tbl.search(multivector_query).limit(top_k).select(["index", "pdf_name", "pdf_page"]).to_list() 126 | 127 | end_time = time.time() 128 | logger.info(f"done search_images_by_text, total time {end_time - start_time}") 129 | 130 | return search_result 131 | 132 | 133 | def call_vllm(image_data: PIL.Image.Image, user_query: str, base_url: str, api_key: str, model: str) -> ImageAnswer: 134 | logger.info("start call_vllm") 135 | start_time = time.time() 136 | 137 | model = "Qwen2-VL-7B-Instruct" 138 | 139 | prompt = f""" 140 | Based on the user's query: 141 | ### 142 | {user_query} 143 | ### 144 | 145 | and the provided image, determine if the image contains enough information to answer the query. 146 | If it does, provide the most accurate answer possible based on the image. 147 | If it does not, respond with the exact phrase "NA". 148 | 149 | Please return your response in valid JSON with the structure: 150 | {{ 151 | "answer": "Answer text or NA" 152 | }} 153 | """ 154 | 155 | print(prompt) 156 | buffered = BytesIO() 157 | max_size = (512, 512) 158 | image_data.thumbnail(max_size) 159 | image_data.save(buffered, format="JPEG") 160 | img_b64_str = base64.b64encode(buffered.getvalue()).decode("utf-8") 161 | 162 | client = OpenAI(base_url=base_url, api_key=api_key) 163 | completion = client.beta.chat.completions.parse( 164 | model=model, 165 | messages=[ 166 | { 167 | "role": "user", 168 | "content": [ 169 | {"type": "text", "text": prompt}, 170 | { 171 | "type": "image_url", 172 | "image_url": {"url": f"data:image/jpeg;base64,{img_b64_str}"}, 173 | }, 174 | ], 175 | } 176 | ], 177 | response_format=ImageAnswer, 178 | extra_body=dict(guided_decoding_backend="outlines"), 179 | ) 180 | message = completion.choices[0].message 181 | result = message.parsed 182 | 183 | end_time = time.time() 184 | logger.info(f"done call_vllm, total time {end_time - start_time}") 185 | 186 | return result 187 | -------------------------------------------------------------------------------- /no-ocr-api/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets==3.1.0 2 | pdf2image==1.16.3 3 | pypdf==5.0.1 4 | streamlit==1.40.1 5 | pydantic-settings==2.6.1 6 | openai==1.55.3 7 | fastapi[standard] 8 | diskcache 9 | ipython==8.31.0 10 | pytest==8.3.4 11 | pytest-cov==6.0.0 -------------------------------------------------------------------------------- /no-ocr-api/tests/lance_vs_qdrant.py: -------------------------------------------------------------------------------- 1 | import PIL.Image 2 | import lancedb 3 | import numpy as np 4 | import pyarrow as pa 5 | import base64 6 | import io 7 | import json 8 | import logging 9 | import time 10 | from io import BytesIO 11 | from pathlib import Path 12 | from typing import List 13 | 14 | import PIL 15 | import PIL.Image 16 | from PIL import Image 17 | import requests 18 | from openai import OpenAI 19 | from pydantic import BaseModel 20 | from qdrant_client import QdrantClient, models 21 | from tqdm import tqdm 22 | 23 | class CustomRailwayLogFormatter(logging.Formatter): 24 | def format(self, record): 25 | log_record = { 26 | "time": self.formatTime(record), 27 | "level": record.levelname, 28 | "message": record.getMessage() 29 | } 30 | return json.dumps(log_record) 31 | 32 | def get_logger(): 33 | logger = logging.getLogger() 34 | logger.setLevel(logging.INFO) 35 | handler = logging.StreamHandler() 36 | for handler in logger.handlers[:]: 37 | logger.removeHandler(handler) 38 | formatter = CustomRailwayLogFormatter() 39 | handler.setFormatter(formatter) 40 | logger.addHandler(handler) 41 | return logger 42 | 43 | logger = get_logger() 44 | 45 | 46 | class ColPaliClient: 47 | def __init__(self, base_url: str = "http://localhost:8000", token: str = "super-secret-token"): 48 | self.base_url = base_url 49 | self.headers = {"Authorization": f"Bearer {token}"} 50 | 51 | def query_text(self, query_text: str): 52 | response = requests.post(f"{self.base_url}/query", headers=self.headers, params={"query_text": query_text}) 53 | response.raise_for_status() 54 | return response.json() 55 | 56 | def process_image(self, image_path: str): 57 | with open(image_path, "rb") as image_file: 58 | files = {"image": image_file} 59 | response = requests.post(f"{self.base_url}/process_image", files=files, headers=self.headers) 60 | response.raise_for_status() 61 | return response.json() 62 | 63 | def process_pil_image(self, pil_image): 64 | buffered = io.BytesIO() 65 | pil_image.save(buffered, format="JPEG") 66 | files = {"image": buffered.getvalue()} 67 | response = requests.post(f"{self.base_url}/process_image", files=files, headers=self.headers) 68 | response.raise_for_status() 69 | return response.json() 70 | 71 | 72 | class IngestClientQdrant: 73 | def __init__(self, qdrant_uri: str, port: int, https: bool, index_threshold: int, vector_size: int, quantile: float, top_k: int, base_url: str, token: str): 74 | self.qdrant_client = QdrantClient(qdrant_uri, port=port, https=https) 75 | self.colpali_client = ColPaliClient(base_url, token) 76 | self.index_threshold = index_threshold 77 | self.vector_size = vector_size 78 | self.quantile = quantile 79 | self.top_k = top_k 80 | 81 | def ingest(self, case_name, dataset): 82 | logger.info("start ingest") 83 | start_time = time.time() 84 | 85 | self.qdrant_client.create_collection( 86 | collection_name=case_name, 87 | on_disk_payload=True, 88 | optimizers_config=models.OptimizersConfigDiff(indexing_threshold=self.index_threshold), 89 | vectors_config=models.VectorParams( 90 | size=self.vector_size, 91 | distance=models.Distance.COSINE, 92 | multivector_config=models.MultiVectorConfig(comparator=models.MultiVectorComparator.MAX_SIM), 93 | quantization_config=models.ScalarQuantization( 94 | scalar=models.ScalarQuantizationConfig( 95 | type=models.ScalarType.INT8, 96 | quantile=self.quantile, 97 | always_ram=True, 98 | ), 99 | ), 100 | ), 101 | ) 102 | 103 | # Use tqdm to create a progress bar 104 | with tqdm(total=len(dataset), desc="Indexing Progress") as pbar: 105 | for i in range(len(dataset)): 106 | # The images are already PIL Image objects, so we can use them directly 107 | image = dataset[i]["image"] 108 | 109 | # Process and encode image using ColPaliClient 110 | response = self.colpali_client.process_pil_image(image) 111 | image_embedding = response["embedding"] 112 | 113 | # Prepare point for Qdrant 114 | point = models.PointStruct( 115 | id=i, # we just use the index as the ID 116 | vector=image_embedding, # This is now a list of vectors 117 | payload={ 118 | "index": dataset[i]["index"], 119 | "pdf_name": dataset[i]["pdf_name"], 120 | "pdf_page": dataset[i]["pdf_page"], 121 | }, # can also add other metadata/data 122 | ) 123 | 124 | try: 125 | self.qdrant_client.upsert( 126 | collection_name=case_name, 127 | points=[point], 128 | wait=False, 129 | ) 130 | except Exception as e: 131 | logger.error(f"Error during upsert: {e}") 132 | continue 133 | pbar.update(1) 134 | 135 | logger.info("Indexing complete!") 136 | end_time = time.time() 137 | logger.info(f"done ingest, total time {end_time - start_time}") 138 | 139 | class IngestClientLance: 140 | def __init__(self, lance_uri: str, vector_size: int, base_url: str = "http://localhost:8000", token: str = "super-secret-token"): 141 | self.lance_client = lancedb.connect(lance_uri) 142 | self.vector_size = vector_size 143 | self.colpali_client = ColPaliClient(base_url, token) 144 | 145 | def ingest(self, case_name, dataset): 146 | logger.info("start ingest") 147 | start_time = time.time() 148 | 149 | 150 | schema = pa.schema( 151 | [ 152 | pa.field("index", pa.int64()), 153 | pa.field("pdf_name", pa.string()), 154 | pa.field("pdf_page", pa.int64()), 155 | pa.field("vector", pa.list_(pa.list_(pa.float32(), self.vector_size))), 156 | ] 157 | ) 158 | 159 | tbl = self.lance_client.create_table(case_name, schema=schema) 160 | with tqdm(total=len(dataset), desc="Indexing Progress") as pbar: 161 | for i in range(len(dataset)): 162 | image = dataset[i]["image"] 163 | response = self.colpali_client.process_pil_image(image) 164 | image_embedding = response["embedding"] 165 | 166 | data = { 167 | "index": dataset[i]["index"], 168 | "pdf_name": dataset[i]["pdf_name"], 169 | "pdf_page": dataset[i]["pdf_page"], 170 | "vector": image_embedding, 171 | } 172 | 173 | try: 174 | tbl.add([data]) 175 | except Exception as e: 176 | logger.error(f"Error during upsert: {e}") 177 | continue 178 | pbar.update(1) 179 | 180 | tbl.create_index(metric="cosine") 181 | 182 | logger.info("Indexing complete!") 183 | end_time = time.time() 184 | logger.info(f"done ingest, total time {end_time - start_time}") 185 | 186 | def search_images_by_text(self, query_text, case_name: str, top_k: int): 187 | logger.info("start search_images_by_text") 188 | start_time = time.time() 189 | 190 | query_embedding = self.colpali_client.query_text(query_text) 191 | multivector_query = np.array(query_embedding["embedding"]) 192 | tbl = self.lance_client.open_table(case_name) 193 | search_result = tbl.search(multivector_query).limit(top_k).select(["index", "pdf_name", "pdf_page"]).to_list() 194 | 195 | end_time = time.time() 196 | logger.info(f"done search_images_by_text, total time {end_time - start_time}") 197 | 198 | return search_result 199 | 200 | class SearchClientQdrant: 201 | def __init__(self, qdrant_uri: str, port: int, https: bool, top_k: int, base_url: str, token: str): 202 | self.qdrant_client = QdrantClient(qdrant_uri, port=port, https=https) 203 | self.colpali_client = ColPaliClient(base_url=base_url, token=token) 204 | self.top_k = top_k 205 | 206 | def search_images_by_text(self, query_text, case_name: str, top_k: int): 207 | logger.info("start search_images_by_text") 208 | start_time = time.time() 209 | 210 | # Use ColPaliClient to query text and get the embedding 211 | query_embedding = self.colpali_client.query_text(query_text) 212 | 213 | # Extract the embedding from the response 214 | multivector_query = query_embedding["embedding"] 215 | 216 | # Search in Qdrant 217 | search_result = self.qdrant_client.query_points(collection_name=case_name, query=multivector_query, limit=top_k) 218 | 219 | end_time = time.time() 220 | logger.info(f"done search_images_by_text, total time {end_time - start_time}") 221 | 222 | return search_result 223 | 224 | 225 | def benchmark_ingest_clients(): 226 | # Create mock data 227 | mock_data = [ 228 | { 229 | "index": i, 230 | "pdf_name": f"document_{i}.pdf", 231 | "pdf_page": i % 10, 232 | "image": Image.fromarray(np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)) 233 | } 234 | for i in range(1024) 235 | ] 236 | 237 | # Initialize clients 238 | qdrant_client = IngestClientQdrant( 239 | qdrant_uri="http://localhost", 240 | port=6333, 241 | https=False, 242 | index_threshold=100, 243 | vector_size=128, 244 | quantile=0.99, 245 | top_k=10, 246 | base_url="http://localhost:8000", 247 | token="super-secret-token" 248 | ) 249 | 250 | lance_client = IngestClientLance( 251 | lance_uri="lance-db-data/qwe123", 252 | vector_size=128, 253 | base_url="http://localhost:8000", 254 | token="super-secret-token" 255 | ) 256 | 257 | # Benchmark Qdrant ingestion 258 | # start_time = time.time() 259 | # qdrant_client.ingest("test_case_qdrant", mock_data) 260 | # qdrant_duration = time.time() - start_time 261 | # logger.info(f"Qdrant ingestion time: {qdrant_duration} seconds") 262 | 263 | # # Benchmark Lance ingestion 264 | # start_time = time.time() 265 | # lance_client.ingest("test_case_lance", mock_data) 266 | # lance_duration = time.time() - start_time 267 | # logger.info(f"Lance ingestion time: {lance_duration} seconds") 268 | 269 | # Initialize search clients 270 | qdrant_search_client = SearchClientQdrant( 271 | qdrant_uri="http://localhost", 272 | port=6333, 273 | https=False, 274 | top_k=10, 275 | base_url="http://localhost:8000", 276 | token="super-secret-token" 277 | ) 278 | 279 | # Benchmark Qdrant search 280 | start_time = time.time() 281 | qdrant_search_client.search_images_by_text("example query", "test_case_qdrant", 10) 282 | qdrant_search_duration = time.time() - start_time 283 | logger.info(f"Qdrant search time: {qdrant_search_duration} seconds") 284 | 285 | # Benchmark Lance search 286 | start_time = time.time() 287 | lance_client.search_images_by_text("example query", "test_case_lance", 10) 288 | lance_search_duration = time.time() - start_time 289 | logger.info(f"Lance search time: {lance_search_duration} seconds") 290 | 291 | return { 292 | # "qdrant_ingestion_duration": qdrant_duration, 293 | # "lance_ingestion_duration": lance_duration, 294 | "qdrant_search_duration": qdrant_search_duration, 295 | "lance_search_duration": lance_search_duration 296 | } 297 | 298 | if __name__ == "__main__": 299 | benchmark_results = benchmark_ingest_clients() 300 | print(benchmark_results) 301 | -------------------------------------------------------------------------------- /no-ocr-api/tests/mock_colpali.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from fastapi import APIRouter, Depends, FastAPI, HTTPException, Security, UploadFile 3 | from fastapi.middleware.cors import CORSMiddleware 4 | from fastapi.security import HTTPBearer 5 | 6 | app = FastAPI( 7 | title="Mock ColPali Server", 8 | description="Mock server for ColPali model with FastAPI", 9 | version="0.0.1", 10 | docs_url="/docs", 11 | ) 12 | 13 | # Security: CORS middleware for external requests 14 | http_bearer = HTTPBearer( 15 | scheme_name="Bearer Token", 16 | description="See code for authentication details.", 17 | ) 18 | app.add_middleware( 19 | CORSMiddleware, 20 | allow_origins=["*"], 21 | allow_credentials=True, 22 | allow_methods=["*"], 23 | allow_headers=["*"], 24 | ) 25 | 26 | # Security: inject dependency on authed routes 27 | TOKEN = "super-secret-token" 28 | 29 | async def is_authenticated(api_key: str = Security(http_bearer)): 30 | if api_key.credentials != TOKEN: 31 | raise HTTPException( 32 | status_code=401, 33 | detail="Invalid authentication credentials", 34 | ) 35 | return {"username": "authenticated_user"} 36 | 37 | router = APIRouter(dependencies=[Depends(is_authenticated)]) 38 | 39 | # Define a simple endpoint to process text queries 40 | @router.post("/query") 41 | async def query_model(query_text: str): 42 | # Mock response: generate a random embedding with shape (3, 128) 43 | mock_embedding = np.random.rand(3, 128).tolist() 44 | return {"embedding": mock_embedding} 45 | 46 | @router.post("/process_image") 47 | async def process_image(image: UploadFile): 48 | # Mock response: generate a random embedding with shape (1030, 128) 49 | mock_embedding = np.random.rand(1030, 128).tolist() 50 | return {"embedding": mock_embedding} 51 | 52 | # Add authed router to our FastAPI app 53 | app.include_router(router) 54 | -------------------------------------------------------------------------------- /no-ocr-api/tests/test_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | import pytest 5 | from fastapi.testclient import TestClient 6 | from np_ocr.api import app 7 | 8 | 9 | @pytest.fixture 10 | def client(): 11 | """ 12 | A pytest fixture that creates a TestClient for our FastAPI 'app'. 13 | It also cleans up any residual storage after tests run. 14 | """ 15 | 16 | with TestClient(app) as c: 17 | yield c 18 | 19 | if os.path.exists("storage"): 20 | shutil.rmtree("storage") 21 | 22 | def test_health_check(client): 23 | """ 24 | Test the health check endpoint to ensure the API is running. 25 | """ 26 | response = client.get("/health") 27 | assert response.status_code == 200 28 | assert response.json() == {"status": "ok"} 29 | 30 | def test_end2end(client): 31 | # Step 1: Create a case with a document 32 | import uuid 33 | 34 | case_name = str(uuid.uuid4()) 35 | user_id = str(uuid.uuid4()) 36 | print(f"Creating case '{case_name}' for user '{user_id}'") 37 | files = {"files": ("InfraRed-Report.pdf", open("data/InfraRed-Report.pdf", "rb"), "application/pdf")} 38 | response = client.post("/create_case", data={"user_id": user_id, "case_name": case_name}, files=files) 39 | print(f"Response status code for create_case: {response.status_code}") 40 | assert response.status_code == 200 41 | case_info = response.json() 42 | print(f"Case info after creation: {case_info}") 43 | assert case_info["status"] == "processing" 44 | 45 | # Step 2: Poll the get_case endpoint until the status is 'done' 46 | import time 47 | max_retries = 10 48 | for attempt in range(max_retries): 49 | print(f"Polling get_case, attempt {attempt + 1}") 50 | response = client.get(f"/get_case/{case_name}", params={"user_id": user_id}) 51 | print(f"Response status code for get_case: {response.status_code}") 52 | assert response.status_code == 200 53 | case_info = response.json() 54 | print(f"Case info during polling: {case_info}") 55 | if case_info["status"] == "done": 56 | print("Case processing completed.") 57 | break 58 | time.sleep(1) # Wait for a second before retrying 59 | else: 60 | pytest.fail("Case processing did not complete in time.") 61 | 62 | # Step 3: Call the search endpoint 63 | print(f"Calling search endpoint for case '{case_name}'") 64 | response = client.post("/search", data={"user_query": "Margin between the SaaS and Infra companies?", "user_id": user_id, "case_name": case_name}) 65 | print(f"Response status code for search: {response.status_code}") 66 | assert response.status_code == 200 67 | search_results = response.json() 68 | assert "search_results" in search_results 69 | assert len(search_results["search_results"]) > 0 70 | 71 | # Check the output schema 72 | for result in search_results["search_results"]: 73 | assert "score" in result 74 | assert "pdf_name" in result 75 | assert "pdf_page" in result 76 | assert isinstance(result["score"], float) 77 | assert isinstance(result["pdf_name"], str) 78 | assert isinstance(result["pdf_page"], int) 79 | 80 | # Step 4: Call the vllm endpoint with random pages from search results 81 | import random 82 | random_result = random.choice(search_results["search_results"]) 83 | pdf_name = random_result["pdf_name"] 84 | pdf_page = random_result["pdf_page"] 85 | print(f"Calling vllm endpoint for PDF '{pdf_name}' page {pdf_page}") 86 | response = client.post("/vllm_call", data={ 87 | "user_query": "AI", 88 | "user_id": user_id, 89 | "case_name": case_name, 90 | "pdf_name": pdf_name, 91 | "pdf_page": pdf_page 92 | }) 93 | print(f"Response status code for vllm_call: {response.status_code}") 94 | assert response.status_code == 200 95 | vllm_result = response.json() 96 | assert "answer" in vllm_result 97 | print(f"VLLM result: {vllm_result['answer']}") 98 | 99 | # Step 5: Delete the case 100 | print(f"Deleting case '{case_name}' for user '{user_id}'") 101 | response = client.delete(f"/delete_case/{case_name}", params={"user_id": user_id}) 102 | print(f"Response status code for delete_case: {response.status_code}") 103 | assert response.status_code == 200 104 | delete_result = response.json() 105 | assert "message" in delete_result 106 | print(f"Delete result: {delete_result['message']}") 107 | -------------------------------------------------------------------------------- /no-ocr-llms/llm_serving.py: -------------------------------------------------------------------------------- 1 | import modal 2 | 3 | vllm_image = modal.Image.debian_slim(python_version="3.12").pip_install( 4 | "vllm==0.6.3post1", "fastapi[standard]==0.115.4" 5 | ) 6 | 7 | 8 | MODELS_DIR = "/models" 9 | MODEL_NAME = "Qwen/Qwen2-VL-7B-Instruct" 10 | 11 | 12 | try: 13 | volume = modal.Volume.lookup("models", create_if_missing=False) 14 | except modal.exception.NotFoundError: 15 | raise Exception("Download models first with modal run download_llama.py") 16 | 17 | 18 | app = modal.App("qwen2-vllm") 19 | 20 | N_GPU = 1 # tip: for best results, first upgrade to more powerful GPUs, and only then increase GPU count 21 | TOKEN = "super-secret-token" # auth token. for production use, replace with a modal.Secret 22 | 23 | MINUTES = 60 # seconds 24 | HOURS = 60 * MINUTES 25 | 26 | 27 | @app.function( 28 | image=vllm_image, 29 | gpu=modal.gpu.A100(count=N_GPU), 30 | keep_warm=0, 31 | container_idle_timeout=1 * MINUTES, 32 | timeout=24 * HOURS, 33 | allow_concurrent_inputs=1000, 34 | volumes={MODELS_DIR: volume}, 35 | ) 36 | @modal.asgi_app() 37 | def serve(): 38 | import fastapi 39 | import vllm.entrypoints.openai.api_server as api_server 40 | from vllm.engine.arg_utils import AsyncEngineArgs 41 | from vllm.engine.async_llm_engine import AsyncLLMEngine 42 | from vllm.entrypoints.logger import RequestLogger 43 | from vllm.entrypoints.openai.serving_chat import OpenAIServingChat 44 | from vllm.entrypoints.openai.serving_completion import ( 45 | OpenAIServingCompletion, 46 | ) 47 | from vllm.entrypoints.openai.serving_engine import BaseModelPath 48 | from vllm.usage.usage_lib import UsageContext 49 | 50 | volume.reload() # ensure we have the latest version of the weights 51 | 52 | # create a fastAPI app that uses vLLM's OpenAI-compatible router 53 | web_app = fastapi.FastAPI( 54 | title=f"OpenAI-compatible {MODEL_NAME} server", 55 | description="Run an OpenAI-compatible LLM server with vLLM on modal.com 🚀", 56 | version="0.0.1", 57 | docs_url="/docs", 58 | ) 59 | 60 | # security: CORS middleware for external requests 61 | http_bearer = fastapi.security.HTTPBearer( 62 | scheme_name="Bearer Token", 63 | description="See code for authentication details.", 64 | ) 65 | web_app.add_middleware( 66 | fastapi.middleware.cors.CORSMiddleware, 67 | allow_origins=["*"], 68 | allow_credentials=True, 69 | allow_methods=["*"], 70 | allow_headers=["*"], 71 | ) 72 | 73 | # security: inject dependency on authed routes 74 | async def is_authenticated(api_key: str = fastapi.Security(http_bearer)): 75 | if api_key.credentials != TOKEN: 76 | raise fastapi.HTTPException( 77 | status_code=fastapi.status.HTTP_401_UNAUTHORIZED, 78 | detail="Invalid authentication credentials", 79 | ) 80 | return {"username": "authenticated_user"} 81 | 82 | router = fastapi.APIRouter(dependencies=[fastapi.Depends(is_authenticated)]) 83 | 84 | # wrap vllm's router in auth router 85 | router.include_router(api_server.router) 86 | # add authed vllm to our fastAPI app 87 | web_app.include_router(router) 88 | 89 | engine_args = AsyncEngineArgs( 90 | model=MODELS_DIR + "/" + MODEL_NAME, 91 | tensor_parallel_size=N_GPU, 92 | gpu_memory_utilization=0.90, 93 | max_model_len=8096, 94 | enforce_eager=False, # capture the graph for faster inference, but slower cold starts (30s > 20s) 95 | ) 96 | 97 | engine = AsyncLLMEngine.from_engine_args(engine_args, usage_context=UsageContext.OPENAI_API_SERVER) 98 | 99 | model_config = get_model_config(engine) 100 | 101 | request_logger = RequestLogger(max_log_len=2048) 102 | 103 | base_model_paths = [BaseModelPath(name=MODEL_NAME.split("/")[1], model_path=MODEL_NAME)] 104 | 105 | api_server.chat = lambda s: OpenAIServingChat( 106 | engine, 107 | model_config=model_config, 108 | base_model_paths=base_model_paths, 109 | chat_template=None, 110 | response_role="assistant", 111 | lora_modules=[], 112 | prompt_adapters=[], 113 | request_logger=request_logger, 114 | ) 115 | api_server.completion = lambda s: OpenAIServingCompletion( 116 | engine, 117 | model_config=model_config, 118 | base_model_paths=base_model_paths, 119 | lora_modules=[], 120 | prompt_adapters=[], 121 | request_logger=request_logger, 122 | ) 123 | 124 | return web_app 125 | 126 | 127 | def get_model_config(engine): 128 | import asyncio 129 | 130 | try: # adapted from vLLM source -- https://github.com/vllm-project/vllm/blob/507ef787d85dec24490069ffceacbd6b161f4f72/vllm/entrypoints/openai/api_server.py#L235C1-L247C1 131 | event_loop = asyncio.get_running_loop() 132 | except RuntimeError: 133 | event_loop = None 134 | 135 | if event_loop is not None and event_loop.is_running(): 136 | # If the current is instanced by Ray Serve, 137 | # there is already a running event loop 138 | model_config = event_loop.run_until_complete(engine.get_model_config()) 139 | else: 140 | # When using single vLLM without engine_use_ray 141 | model_config = asyncio.run(engine.get_model_config()) 142 | 143 | return model_config 144 | -------------------------------------------------------------------------------- /no-ocr-llms/llm_serving_colpali.py: -------------------------------------------------------------------------------- 1 | import modal 2 | 3 | vllm_image = ( 4 | modal.Image.debian_slim(python_version="3.12") 5 | .pip_install("vllm==0.6.3post1", "fastapi[standard]==0.115.4") 6 | .pip_install("colpali-engine") 7 | ) 8 | 9 | MODELS_DIR = "/models" 10 | MODEL_NAME = "vidore/colqwen2-v1.0-merged" 11 | 12 | 13 | try: 14 | volume = modal.Volume.lookup("models", create_if_missing=False) 15 | except modal.exception.NotFoundError: 16 | raise Exception("Download models first with modal run download_llama.py") 17 | 18 | 19 | app = modal.App("colpali-embedding") 20 | 21 | N_GPU = 1 22 | TOKEN = "super-secret-token" 23 | 24 | MINUTES = 60 # seconds 25 | HOURS = 60 * MINUTES 26 | 27 | 28 | @app.function( 29 | image=vllm_image, 30 | gpu=modal.gpu.A100(count=N_GPU), 31 | keep_warm=0, 32 | container_idle_timeout=1 * MINUTES, 33 | timeout=24 * HOURS, 34 | allow_concurrent_inputs=1000, 35 | volumes={MODELS_DIR: volume}, 36 | ) 37 | @modal.asgi_app() 38 | def serve(): 39 | import fastapi 40 | import torch 41 | from colpali_engine.models import ColQwen2, ColQwen2Processor 42 | from fastapi import APIRouter, Depends, HTTPException, Security 43 | from fastapi.middleware.cors import CORSMiddleware 44 | from fastapi.security import HTTPBearer 45 | 46 | volume.reload() # ensure we have the latest version of the weights 47 | 48 | # create a fastAPI app for serving the ColPali model 49 | web_app = fastapi.FastAPI( 50 | title=f"ColPali {MODEL_NAME} server", 51 | description="Run a ColPali model server with fastAPI on modal.com 🚀", 52 | version="0.0.1", 53 | docs_url="/docs", 54 | ) 55 | 56 | # security: CORS middleware for external requests 57 | http_bearer = HTTPBearer( 58 | scheme_name="Bearer Token", 59 | description="See code for authentication details.", 60 | ) 61 | web_app.add_middleware( 62 | CORSMiddleware, 63 | allow_origins=["*"], 64 | allow_credentials=True, 65 | allow_methods=["*"], 66 | allow_headers=["*"], 67 | ) 68 | 69 | # security: inject dependency on authed routes 70 | async def is_authenticated(api_key: str = Security(http_bearer)): 71 | if api_key.credentials != TOKEN: 72 | raise HTTPException( 73 | status_code=fastapi.status.HTTP_401_UNAUTHORIZED, 74 | detail="Invalid authentication credentials", 75 | ) 76 | return {"username": "authenticated_user"} 77 | 78 | router = APIRouter(dependencies=[Depends(is_authenticated)]) 79 | 80 | # Define the model and processor 81 | model_name = "/models/vidore/colqwen2-v1.0-merged" 82 | colpali_model = ColQwen2.from_pretrained( 83 | model_name, 84 | torch_dtype=torch.bfloat16, 85 | device_map="cuda:0", 86 | ).eval() 87 | 88 | colpali_processor = ColQwen2Processor.from_pretrained(model_name) 89 | 90 | # Define a simple endpoint to process text queries 91 | @router.post("/query") 92 | async def query_model(query_text: str): 93 | with torch.no_grad(): 94 | batch_query = colpali_processor.process_queries([query_text]).to(colpali_model.device) 95 | query_embedding = colpali_model(**batch_query) 96 | return {"embedding": query_embedding[0].cpu().float().numpy().tolist()} 97 | 98 | @router.post("/process_image") 99 | async def process_image(image: fastapi.UploadFile): 100 | from PIL import Image 101 | 102 | pil_image = Image.open(image.file) 103 | with torch.no_grad(): 104 | batch_image = colpali_processor.process_images([pil_image]).to(colpali_model.device) 105 | image_embedding = colpali_model(**batch_image) 106 | return {"embedding": image_embedding[0].cpu().float().numpy().tolist()} 107 | 108 | # add authed router to our fastAPI app 109 | web_app.include_router(router) 110 | 111 | return web_app 112 | -------------------------------------------------------------------------------- /no-ocr-llms/llm_serving_load_models.py: -------------------------------------------------------------------------------- 1 | import modal 2 | 3 | MODELS_DIR = "/models" 4 | 5 | DEFAULT_NAME = "Qwen/Qwen2.5-7B-Instruct" 6 | DEFAULT_REVISION = "bb46c15ee4bb56c5b63245ef50fd7637234d6f75" 7 | 8 | 9 | volume = modal.Volume.from_name("models", create_if_missing=True) 10 | 11 | image = ( 12 | modal.Image.debian_slim(python_version="3.10") 13 | .pip_install( 14 | [ 15 | "huggingface_hub", 16 | "hf-transfer", 17 | ] 18 | ) 19 | .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) 20 | ) 21 | 22 | 23 | MINUTES = 60 24 | HOURS = 60 * MINUTES 25 | 26 | 27 | app = modal.App(image=image, secrets=[modal.Secret.from_name("huggingface-secret")]) 28 | 29 | 30 | @app.function(volumes={MODELS_DIR: volume}, timeout=4 * HOURS) 31 | def download_model(model_name, model_revision, force_download=False): 32 | from huggingface_hub import snapshot_download 33 | 34 | volume.reload() 35 | 36 | snapshot_download( 37 | model_name, 38 | local_dir=MODELS_DIR + "/" + model_name, 39 | ignore_patterns=[ 40 | "*.pt", 41 | "*.bin", 42 | "*.pth", 43 | "original/*", 44 | ], # Ensure safetensors 45 | revision=model_revision, 46 | force_download=force_download, 47 | ) 48 | 49 | volume.commit() 50 | 51 | 52 | @app.local_entrypoint() 53 | def main( 54 | model_name: str = DEFAULT_NAME, 55 | model_revision: str = DEFAULT_REVISION, 56 | force_download: bool = False, 57 | ): 58 | download_model.remote(model_name, model_revision, force_download) 59 | -------------------------------------------------------------------------------- /no-ocr-ui/.bolt/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "template": "bolt-vite-react-ts" 3 | } 4 | -------------------------------------------------------------------------------- /no-ocr-ui/.bolt/prompt: -------------------------------------------------------------------------------- 1 | For all designs I ask you to make, have them be beautiful, not cookie cutter. Make webpages that are fully featured and worthy for production. 2 | 3 | By default, this template supports JSX syntax with Tailwind CSS classes, React hooks, and Lucide React for icons. Do not install other packages for UI themes, icons, etc unless absolutely necessary or I request them. 4 | 5 | Use icons from lucide-react for logos. 6 | 7 | Use stock photos from unsplash where appropriate, only valid URLs you know exist. Do not download the images, only link to them in image tags. 8 | 9 | -------------------------------------------------------------------------------- /no-ocr-ui/.env.example: -------------------------------------------------------------------------------- 1 | VITE_SUPABASE_URL="" 2 | VITE_SUPABASE_ANON_KEY="" 3 | VITE_REACT_APP_API_URI="" 4 | -------------------------------------------------------------------------------- /no-ocr-ui/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /no-ocr-ui/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use a single node image 2 | FROM node:18 3 | 4 | WORKDIR /app 5 | 6 | # Copy package.json and package-lock.json 7 | COPY package.json package-lock.json ./ 8 | 9 | # Install dependencies 10 | RUN npm install 11 | 12 | # Copy the rest of the application code 13 | COPY . . 14 | 15 | # Set build-time environment variables 16 | ARG VITE_SUPABASE_URL 17 | ARG VITE_SUPABASE_ANON_KEY 18 | ARG VITE_REACT_APP_API_URI 19 | 20 | # Build the application 21 | RUN VITE_SUPABASE_URL=$VITE_SUPABASE_URL VITE_SUPABASE_ANON_KEY=$VITE_SUPABASE_ANON_KEY VITE_REACT_APP_API_URI=$VITE_REACT_APP_API_URI npm run build 22 | 23 | # Install serve globally 24 | RUN npm install -g serve 25 | 26 | EXPOSE 5173 27 | 28 | # Start the application using serve 29 | CMD ["serve", "-s", "./dist", "-l", "5173"] -------------------------------------------------------------------------------- /no-ocr-ui/eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from '@eslint/js'; 2 | import globals from 'globals'; 3 | import reactHooks from 'eslint-plugin-react-hooks'; 4 | import reactRefresh from 'eslint-plugin-react-refresh'; 5 | import tseslint from 'typescript-eslint'; 6 | 7 | export default tseslint.config( 8 | { ignores: ['dist'] }, 9 | { 10 | extends: [js.configs.recommended, ...tseslint.configs.recommended], 11 | files: ['**/*.{ts,tsx}'], 12 | languageOptions: { 13 | ecmaVersion: 2020, 14 | globals: globals.browser, 15 | }, 16 | plugins: { 17 | 'react-hooks': reactHooks, 18 | 'react-refresh': reactRefresh, 19 | }, 20 | rules: { 21 | ...reactHooks.configs.recommended.rules, 22 | 'react-refresh/only-export-components': [ 23 | 'warn', 24 | { allowConstantExport: true }, 25 | ], 26 | }, 27 | } 28 | ); 29 | -------------------------------------------------------------------------------- /no-ocr-ui/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | NoOCR 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /no-ocr-ui/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "vite-react-typescript-starter", 3 | "private": true, 4 | "version": "0.0.0", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "vite build", 9 | "lint": "eslint .", 10 | "preview": "vite preview" 11 | }, 12 | "dependencies": { 13 | "@supabase/supabase-js": "^2.39.7", 14 | "lucide-react": "^0.263.1", 15 | "react": "^18.3.1", 16 | "react-dom": "^18.3.1", 17 | "react-router-dom": "^6.22.3", 18 | "zustand": "^4.5.2" 19 | }, 20 | "devDependencies": { 21 | "@eslint/js": "^9.9.1", 22 | "@types/react": "^18.3.5", 23 | "@types/react-dom": "^18.3.0", 24 | "@vitejs/plugin-react": "^4.3.1", 25 | "autoprefixer": "^10.4.18", 26 | "eslint": "^9.9.1", 27 | "eslint-plugin-react-hooks": "^5.1.0-rc.0", 28 | "eslint-plugin-react-refresh": "^0.4.11", 29 | "globals": "^15.9.0", 30 | "postcss": "^8.4.35", 31 | "tailwindcss": "^3.4.1", 32 | "typescript": "^5.5.3", 33 | "typescript-eslint": "^8.3.0", 34 | "vite": "^5.4.2" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /no-ocr-ui/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /no-ocr-ui/src/App.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect } from 'react'; 2 | import { BrowserRouter as Router, Routes, Route, Navigate } from 'react-router-dom'; 3 | import { initializeAuth } from './stores/authStore'; 4 | import Navbar from './components/layout/Navbar'; 5 | import { LoginForm } from './components/auth/LoginForm'; 6 | import { RegisterForm } from './components/auth/RegisterForm'; 7 | import CreateCase from './components/CreateCase'; 8 | import Cases from './components/Case'; 9 | import Search from './components/Search'; 10 | import About from './components/About'; 11 | 12 | export default function App() { 13 | useEffect(() => { 14 | initializeAuth(); 15 | }, []); 16 | 17 | return ( 18 | 19 |
20 | 21 |
22 | 23 | {/* Public routes (no authentication required) */} 24 | } /> 25 | } /> 26 | } /> 27 | } /> 28 | } /> 29 | 30 | {/* Auth pages */} 31 | } /> 32 | } /> 33 | 34 | {/* Fallback for uncaught routes */} 35 | } /> 36 | 37 |
38 |
39 |
40 | ); 41 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/About.tsx: -------------------------------------------------------------------------------- 1 | import { Features } from './about/Features'; 2 | import { Hero } from './about/Hero'; 3 | import { HowItWorks } from './about/HowItWorks'; 4 | 5 | export default function About() { 6 | return ( 7 |
8 | 9 | 10 | 11 |
12 | ); 13 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/Case.tsx: -------------------------------------------------------------------------------- 1 | import { CaseList } from './collections/CaseList'; 2 | 3 | export default function Cases() { 4 | return ( 5 |
6 |
7 |
8 |

Cases

9 |

10 | Manage your PDF cases and their contents 11 |

12 |
13 |
14 | 15 |
16 | 17 |
18 |
19 | ); 20 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/CreateCase.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import { useAuthStore } from '../stores/authStore'; 3 | import { noOcrApiUrl } from '../config/api'; 4 | import { useNavigate } from 'react-router-dom'; 5 | 6 | export default function CreateCase() { 7 | const { user } = useAuthStore(); 8 | const [caseName, setCaseName] = useState(''); 9 | const [files, setFiles] = useState(null); 10 | const [isUploading, setIsUploading] = useState(false); 11 | const [uploadProgress, setUploadProgress] = useState(0); 12 | const [apiMessage, setApiMessage] = useState(null); 13 | const navigate = useNavigate(); 14 | 15 | const handleSubmit = async (e: React.FormEvent) => { 16 | e.preventDefault(); 17 | 18 | if (!user) { 19 | setApiMessage('You must be logged in to create a case.'); 20 | return; 21 | } 22 | 23 | if (!files || !caseName) return; 24 | setIsUploading(true); 25 | 26 | const interval = setInterval(() => { 27 | setUploadProgress((prev) => { 28 | if (prev >= 95) { 29 | clearInterval(interval); 30 | return prev; 31 | } 32 | return prev + 5; 33 | }); 34 | }, 500); 35 | 36 | try { 37 | const formData = new FormData(); 38 | formData.append('case_name', caseName); 39 | formData.append('user_id', user.id); 40 | Array.from(files).forEach(file => formData.append('files', file)); 41 | 42 | const response = await fetch(`${noOcrApiUrl}/create_case`, { 43 | method: 'POST', 44 | body: formData, 45 | }); 46 | 47 | if (!response.ok) { 48 | throw new Error('Network response was not ok'); 49 | } 50 | 51 | const result = await response.json(); 52 | console.log('Upload successful:', result); 53 | 54 | setApiMessage(`Case created successfully: ${result.name}`); 55 | setUploadProgress(100); 56 | setTimeout(() => { 57 | setIsUploading(false); 58 | setCaseName(''); 59 | setFiles(null); 60 | setUploadProgress(0); 61 | navigate('/cases'); 62 | }, 500); 63 | } catch (error: unknown) { 64 | if (error instanceof Error) { 65 | console.error('Upload failed:', error); 66 | setApiMessage(`Upload failed: ${error.message}`); 67 | } else { 68 | console.error('Upload failed:', error); 69 | setApiMessage('Upload failed: An unknown error occurred.'); 70 | } 71 | setIsUploading(false); 72 | setUploadProgress(0); 73 | } 74 | }; 75 | 76 | const handleDrop = (e: React.DragEvent) => { 77 | e.preventDefault(); 78 | setFiles(e.dataTransfer.files); 79 | }; 80 | 81 | const handleDragOver = (e: React.DragEvent) => { 82 | e.preventDefault(); 83 | }; 84 | 85 | return ( 86 |
87 |

Create New Case

88 | 89 |
90 | {!user && ( 91 |
92 | You are not logged in. You can view this page, but you must sign in to actually create a new case. 93 |
94 | )} 95 | 96 |
97 | 100 | setCaseName(e.target.value)} 105 | className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 p-2" 106 | placeholder="Enter case name" 107 | required 108 | /> 109 |
110 | 111 |
112 | 113 |
118 |
119 |
120 | 131 |

or drag and drop

132 |
133 |

PDF files only

134 |
135 |
136 | {files && ( 137 |
138 |

139 | Selected {files.length} file(s) 140 |

141 |
142 | )} 143 |
144 | 145 | {isUploading && ( 146 |
147 |
151 |
152 | )} 153 | 154 | 161 |
162 | 163 | {apiMessage && ( 164 |
165 |

{apiMessage}

166 |
167 | )} 168 |
169 | ); 170 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/Navbar.tsx: -------------------------------------------------------------------------------- 1 | import { FileSearch, FolderPlus, Github } from 'lucide-react'; 2 | import { Link, useLocation } from 'react-router-dom'; 3 | import { LogoutButton } from './auth/LogoutButton'; 4 | import { useAuthStore } from '../stores/authStore'; 5 | 6 | export default function Navbar() { 7 | const location = useLocation(); 8 | const { user } = useAuthStore(); 9 | 10 | return ( 11 | 68 | ); 69 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/Search.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from 'react'; 2 | import { Search as SearchIcon } from 'lucide-react'; 3 | import { Case } from '../types/collection'; 4 | import { noOcrApiUrl } from '../config/api'; 5 | import { useAuthStore } from '../stores/authStore'; 6 | import { useLocation } from 'react-router-dom'; 7 | import { LoadingSpinner } from './shared/LoadingSpinner'; // Import the LoadingSpinner component 8 | 9 | // Define a type for search results 10 | type SearchResult = { 11 | image_base64: string; 12 | pdf_name: string; 13 | pdf_page: number; 14 | score: number; 15 | }; 16 | 17 | export default function Search() { 18 | const { user } = useAuthStore(); 19 | const [selectedCase, setSelectedCase] = useState(''); 20 | const [searchQuery, setSearchQuery] = useState(''); 21 | const [results, setResults] = useState([]); 22 | const [isSearching, setIsSearching] = useState(false); 23 | const [answers, setAnswers] = useState<{ [key: number]: { answer: string } }>({}); 24 | 25 | const [cases, setCases] = useState([]); 26 | const [isModalOpen, setIsModalOpen] = useState(false); 27 | const [modalImage, setModalImage] = useState(''); 28 | 29 | const location = useLocation(); 30 | 31 | useEffect(() => { 32 | const params = new URLSearchParams(location.search); 33 | const caseId = params.get('name'); 34 | if (caseId) { 35 | setSelectedCase(caseId); 36 | } 37 | }, [location]); 38 | 39 | useEffect(() => { 40 | async function fetchCases() { 41 | try { 42 | const response = await fetch(`${noOcrApiUrl}/get_cases?user_id=${user?.id}`); 43 | if (!response.ok) throw new Error('Network response was not ok'); 44 | const data = await response.json(); 45 | setCases(data.cases || []); 46 | } catch (error) { 47 | console.error('Error fetching cases:', error); 48 | } 49 | } 50 | 51 | fetchCases(); 52 | }, [user]); 53 | 54 | const handleSearch = async (e: React.FormEvent) => { 55 | e.preventDefault(); 56 | if (!selectedCase || !searchQuery) return; 57 | 58 | setIsSearching(true); 59 | setResults([]); 60 | setAnswers({}); 61 | 62 | try { 63 | const response = await fetch(`${noOcrApiUrl}/search`, { 64 | method: 'POST', 65 | headers: { 66 | 'Accept': 'application/json', 67 | }, 68 | body: new URLSearchParams({ 69 | user_query: searchQuery, 70 | user_id: user?.id || '', 71 | case_name: selectedCase, 72 | }), 73 | }); 74 | 75 | if (!response.ok) throw new Error('Network response was not ok'); 76 | const data = await response.json(); 77 | setResults(data.search_results || []); 78 | 79 | data.search_results.forEach((result: SearchResult, index: number) => { 80 | fetchAnswer(searchQuery, selectedCase, result.pdf_name, result.pdf_page) 81 | .then(answer => { 82 | setAnswers(prevAnswers => ({ ...prevAnswers, [index]: answer })); 83 | }); 84 | }); 85 | } catch (error) { 86 | console.error('Search failed:', error); 87 | } finally { 88 | setIsSearching(false); 89 | } 90 | }; 91 | 92 | const fetchAnswer = async (userQuery: string, caseName: string, pdfName: string, pdfPage: number) => { 93 | try { 94 | const response = await fetch(`${noOcrApiUrl}/vllm_call`, { 95 | method: 'POST', 96 | headers: { 97 | 'Accept': 'application/json', 98 | }, 99 | body: new URLSearchParams({ 100 | user_query: userQuery, 101 | user_id: user?.id || '', 102 | case_name: caseName, 103 | pdf_name: pdfName, 104 | pdf_page: pdfPage.toString(), 105 | }), 106 | }); 107 | 108 | if (!response.ok) throw new Error('Network response was not ok'); 109 | const data = await response.json(); 110 | return data; 111 | } catch (error) { 112 | console.error('Fetching answer failed:', error); 113 | return { answer: 'NA' }; 114 | } 115 | }; 116 | 117 | const openModal = (imageBase64: string) => { 118 | setModalImage(imageBase64); 119 | setIsModalOpen(true); 120 | }; 121 | 122 | const closeModal = () => { 123 | setIsModalOpen(false); 124 | setModalImage(''); 125 | }; 126 | 127 | return ( 128 |
129 |

AI Search

130 | 131 |
132 |
133 | 136 |
137 | 151 |
152 | 153 | 154 | 155 |
156 |
157 |
158 | 159 |
160 | setSearchQuery(e.target.value)} 164 | placeholder="Enter your search query..." 165 | className="block w-full rounded-md border-gray-300 pl-10 pr-3 py-2 shadow-sm focus:border-blue-500 focus:ring-blue-500" 166 | required 167 | /> 168 | 169 |
170 | 171 | 178 |
179 | 180 | {results.length > 0 && ( 181 |
182 |

Search Results

183 |
184 | {results.map((result, index) => ( 185 |
186 |
187 | {`Page openModal(result.image_base64)} 192 | /> 193 |
194 |
195 |
196 |

197 | Metadata: {result.pdf_name}, Page {result.pdf_page} 198 |

199 |
200 |
201 |

Answer:

202 |

203 | {answers[index] ? answers[index].answer : } 204 |

205 |
206 |
207 |
208 | ))} 209 |
210 |
211 | )} 212 | 213 | {isModalOpen && ( 214 |
215 |
216 | 222 | Enlarged view 227 |
228 |
229 | )} 230 |
231 | ); 232 | } 233 | -------------------------------------------------------------------------------- /no-ocr-ui/src/components/about/Feature.tsx: -------------------------------------------------------------------------------- 1 | // Feature component for highlighting key capabilities based on ColPali for PDF document embeddings. 2 | import { LucideIcon } from 'lucide-react'; 3 | 4 | interface FeatureProps { 5 | icon: LucideIcon; // Lucide icon component to represent the feature 6 | title: string; // Feature name/title 7 | description: string; // Feature description supporting markdown 8 | } 9 | 10 | export function Feature({ icon: Icon, title, description }: FeatureProps) { 11 | return ( 12 |
13 |
14 |
17 |
18 |

{description}

19 |
20 |
21 | ); 22 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/about/Features.tsx: -------------------------------------------------------------------------------- 1 | import { FileSearch, FolderPlus, Zap, Layers } from 'lucide-react'; 2 | import { Feature } from './Feature'; 3 | 4 | /** 5 | * Features showcase the simplified RAG lifecycle. 6 | * Removes direct mentions of images or LanceDB, 7 | * adds open source and multi-case features. 8 | */ 9 | export function Features() { 10 | return ( 11 |
12 |
13 |
14 |

15 | Remove Complexity from Your RAG Applications 16 |

17 |

18 | Next-Generation Document Search 19 |

20 |

21 | Powered by ColPali for document-focused retrieval, our platform simplifies data ingestion 22 | and harnesses advanced RAG technology for seamless PDF analysis. 23 |

24 |
25 | 26 | {/* 27 | Updated to four features, removing references to image or LanceDB speed, 28 | adding open source and multi-case features. 29 | */} 30 |
31 |
32 | 37 | 42 | 47 | 52 |
53 |
54 |
55 |
56 | ); 57 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/about/Hero.tsx: -------------------------------------------------------------------------------- 1 | import { FileSearch } from 'lucide-react'; 2 | import { Link } from 'react-router-dom'; 3 | 4 | /** 5 | * Hero showcases the main application's document-oriented RAG features 6 | * and invites users to explore our advanced AI search. 7 | */ 8 | export function Hero() { 9 | return ( 10 |
11 | 29 |
30 |
31 |
32 | 33 | 34 | Latest Features 35 | 36 | 37 | Document-First RAG 38 | 39 | 40 |
41 |

42 | Intelligent Document Search with RAG 43 |

44 |

45 | Explore an end-to-end solution for complex PDFs, powered by ColPali embeddings. Our AI understands 46 | your documents’ deep structure—making retrieval truly comprehensive. 47 |

48 |
49 | 53 | Get started 54 | 55 | 56 | Try AI Search 57 | 58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 | 66 |

AI-Assisted Search

67 |
68 |

69 | "Find all relevant references discussing inference costs in 2023 PDF reports." 70 |

71 |
72 |
73 |
74 |
75 | 78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 | ); 86 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/about/HowItWorks.tsx: -------------------------------------------------------------------------------- 1 | import { BookOpen, Search, MessageSquare } from 'lucide-react'; 2 | 3 | /** 4 | * HowItWorks outlines our simplified RAG approach: 5 | * - Step 1: Upload & Process Complex PDFs (creates a "case") 6 | * - Step 2: We process your case and make it searchable 7 | * - Step 3: Ask any question, now using the latest open source vision models 8 | */ 9 | export function HowItWorks() { 10 | return ( 11 |
12 |
13 |
14 |

15 | How It Works 16 |

17 |

18 | Our platform adopts a streamlined RAG approach. Simply create a case by uploading your PDFs, let us process them, then ask any question—even about visual elements—using best-in-class open source reasoning models. 19 |

20 | 21 |
22 |
23 |
24 | 25 |
26 |
27 |
28 | 31 |
32 |
33 |

34 | Upload & Process Complex PDFs 35 |

36 |

37 | Create a new case by uploading your most challenging PDF documents. Our system treats each PDF page carefully, generating embeddings without heavy OCR. 38 |

39 |
40 |
41 |
42 | 43 |
44 |
45 | 46 |
47 |
48 |
49 | 52 |
53 |
54 |

55 | We Make It Searchable 56 |

57 |

58 | Once your case is processed, you can quickly run text-based queries to find relevant pages and references—no matter how intricate the PDF layout might be. 59 |

60 |
61 |
62 |
63 | 64 |
65 |
66 | 67 |
68 |
69 |
70 | 73 |
74 |
75 |

76 | Ask Any Visual-Based Question 77 |

78 |

79 | Our approach quickly surfaces relevant pages, then a specialized open source vision model refines the final answer. Even if your PDFs have charts or diagrams, you’ll receive context-aware insights. 80 |

81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 | ); 89 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/auth/AuthGuard.tsx: -------------------------------------------------------------------------------- 1 | import { useAuth } from '../../hooks/useAuth'; 2 | 3 | interface AuthGuardProps { 4 | children: React.ReactNode; 5 | } 6 | 7 | export function AuthGuard({ children }: AuthGuardProps) { 8 | const { user, isLoading } = useAuth(); 9 | 10 | if (isLoading) { 11 | return ( 12 |
13 |
14 |
15 | ); 16 | } 17 | 18 | return user ? <>{children} : null; 19 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/auth/LoginForm.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from 'react'; 2 | import { useNavigate, Link } from 'react-router-dom'; 3 | import { supabase } from '../../lib/supabase'; 4 | 5 | export function LoginForm() { 6 | const [email, setEmail] = useState(''); 7 | const [password, setPassword] = useState(''); 8 | const [isLoading, setIsLoading] = useState(false); 9 | const [error, setError] = useState(null); 10 | const navigate = useNavigate(); 11 | 12 | const handleSubmit = async (e: React.FormEvent) => { 13 | e.preventDefault(); 14 | setIsLoading(true); 15 | setError(null); 16 | 17 | try { 18 | const { error } = await supabase.auth.signInWithPassword({ 19 | email, 20 | password, 21 | }); 22 | 23 | if (error) throw error; 24 | navigate('/'); 25 | } catch (err) { 26 | setError(err instanceof Error ? err.message : 'An error occurred'); 27 | } finally { 28 | setIsLoading(false); 29 | } 30 | }; 31 | 32 | return ( 33 |
34 |
35 |
36 |

37 | Sign in to your account 38 |

39 |
40 |
41 | {error && ( 42 |
43 |
{error}
44 |
45 | )} 46 |
47 |
48 | setEmail(e.target.value)} 53 | className="appearance-none rounded-none relative block w-full px-3 py-2 border border-gray-300 placeholder-gray-500 text-gray-900 rounded-t-md focus:outline-none focus:ring-blue-500 focus:border-blue-500 focus:z-10 sm:text-sm" 54 | placeholder="Email address" 55 | /> 56 |
57 |
58 | setPassword(e.target.value)} 63 | className="appearance-none rounded-none relative block w-full px-3 py-2 border border-gray-300 placeholder-gray-500 text-gray-900 rounded-b-md focus:outline-none focus:ring-blue-500 focus:border-blue-500 focus:z-10 sm:text-sm" 64 | placeholder="Password" 65 | /> 66 |
67 |
68 | 69 |
70 | 77 |
78 | 79 |
80 | 81 | Don't have an account? Sign up 82 | 83 |
84 |
85 |
86 |
87 | ); 88 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/auth/LogoutButton.tsx: -------------------------------------------------------------------------------- 1 | import { useNavigate } from 'react-router-dom'; 2 | import { LogOut } from 'lucide-react'; 3 | import { supabase } from '../../lib/supabase'; 4 | 5 | export function LogoutButton() { 6 | const navigate = useNavigate(); 7 | 8 | const handleLogout = async () => { 9 | await supabase.auth.signOut(); 10 | navigate('/login'); 11 | }; 12 | 13 | return ( 14 | 21 | ); 22 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/auth/RegisterForm.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from 'react'; 2 | import { useNavigate, Link } from 'react-router-dom'; 3 | import { supabase } from '../../lib/supabase'; 4 | 5 | export function RegisterForm() { 6 | const [email, setEmail] = useState(''); 7 | const [password, setPassword] = useState(''); 8 | const [confirmPassword, setConfirmPassword] = useState(''); 9 | const [isLoading, setIsLoading] = useState(false); 10 | const [error, setError] = useState(null); 11 | const navigate = useNavigate(); 12 | 13 | const handleSubmit = async (e: React.FormEvent) => { 14 | e.preventDefault(); 15 | setIsLoading(true); 16 | setError(null); 17 | 18 | if (password !== confirmPassword) { 19 | setError("Passwords don't match"); 20 | setIsLoading(false); 21 | return; 22 | } 23 | 24 | try { 25 | const { error } = await supabase.auth.signUp({ 26 | email, 27 | password, 28 | }); 29 | 30 | if (error) throw error; 31 | 32 | // Show success message and redirect to login 33 | alert('Registration successful! Please check your email to verify your account.'); 34 | navigate('/login'); 35 | } catch (err) { 36 | setError(err instanceof Error ? err.message : 'An error occurred'); 37 | } finally { 38 | setIsLoading(false); 39 | } 40 | }; 41 | 42 | return ( 43 |
44 |
45 |
46 |

47 | Create your account 48 |

49 |
50 |
51 | {error && ( 52 |
53 |
{error}
54 |
55 | )} 56 |
57 |
58 | setEmail(e.target.value)} 63 | className="appearance-none rounded-none relative block w-full px-3 py-2 border border-gray-300 placeholder-gray-500 text-gray-900 rounded-t-md focus:outline-none focus:ring-blue-500 focus:border-blue-500 focus:z-10 sm:text-sm" 64 | placeholder="Email address" 65 | /> 66 |
67 |
68 | setPassword(e.target.value)} 73 | className="appearance-none rounded-none relative block w-full px-3 py-2 border border-gray-300 placeholder-gray-500 text-gray-900 focus:outline-none focus:ring-blue-500 focus:border-blue-500 focus:z-10 sm:text-sm" 74 | placeholder="Password" 75 | minLength={6} 76 | /> 77 |
78 |
79 | setConfirmPassword(e.target.value)} 84 | className="appearance-none rounded-none relative block w-full px-3 py-2 border border-gray-300 placeholder-gray-500 text-gray-900 rounded-b-md focus:outline-none focus:ring-blue-500 focus:border-blue-500 focus:z-10 sm:text-sm" 85 | placeholder="Confirm password" 86 | minLength={6} 87 | /> 88 |
89 |
90 | 91 |
92 | 99 |
100 | 101 |
102 | 103 | Already have an account? Sign in 104 | 105 |
106 |
107 |
108 |
109 | ); 110 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/collection/FileUpload.tsx: -------------------------------------------------------------------------------- 1 | import { Upload } from 'lucide-react'; 2 | 3 | interface FileUploadProps { 4 | files: FileList | null; 5 | onFilesChange: (files: FileList | null) => void; 6 | } 7 | 8 | export function FileUpload({ files, onFilesChange }: FileUploadProps) { 9 | return ( 10 |
11 | 14 |
15 |
16 | 17 |
18 | 30 |

or drag and drop

31 |
32 |

PDF files only

33 |
34 |
35 | {files && ( 36 |
37 |

38 | Selected {files.length} file(s) 39 |

40 |
41 | )} 42 |
43 | ); 44 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/collection/UploadProgress.tsx: -------------------------------------------------------------------------------- 1 | import { Loader2 } from 'lucide-react'; 2 | 3 | interface UploadProgressProps { 4 | progress: number; 5 | isUploading: boolean; 6 | } 7 | 8 | export function UploadProgress({ progress, isUploading }: UploadProgressProps) { 9 | if (!isUploading) return null; 10 | 11 | return ( 12 |
13 |
14 |
18 |
19 |
20 | 21 | Processing... 22 |
23 |
24 | ); 25 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/collections/CaseCard.tsx: -------------------------------------------------------------------------------- 1 | import { FileText, Trash2 } from 'lucide-react'; 2 | import { useState, useEffect } from 'react'; 3 | import { useAuthStore } from '../../stores/authStore'; 4 | import { noOcrApiUrl } from '../../config/api'; 5 | import { Link } from 'react-router-dom'; 6 | 7 | interface CaseCardProps { 8 | caseItem: { 9 | name: string; 10 | status: string; 11 | number_of_PDFs: number; 12 | files: string[]; 13 | }; 14 | } 15 | 16 | export function CaseCard({ caseItem }: CaseCardProps) { 17 | const { user } = useAuthStore(); 18 | const [isDeleting, setIsDeleting] = useState(false); 19 | const [apiMessage, setApiMessage] = useState(null); 20 | const [status, setStatus] = useState(caseItem.status); 21 | 22 | useEffect(() => { 23 | let interval: NodeJS.Timeout; 24 | 25 | if (status === 'processing') { 26 | interval = setInterval(async () => { 27 | try { 28 | const response = await fetch(`${noOcrApiUrl}/get_case/${caseItem.name}?user_id=${user.id}`); 29 | const data = await response.json(); 30 | setStatus(data.status); 31 | 32 | if (data.status === 'done' || data.status === 'error') { 33 | clearInterval(interval); 34 | } 35 | } catch (error) { 36 | console.error('Error fetching case status:', error); 37 | } 38 | }, 5000); // Poll every 5 seconds 39 | } 40 | 41 | return () => clearInterval(interval); 42 | }, [status, caseItem.name, user.id]); 43 | 44 | const handleDelete = async () => { 45 | // If not signed in, block the actual delete action: 46 | if (!user) { 47 | setApiMessage('You must be logged in to delete a case.'); 48 | return; 49 | } 50 | 51 | if (!window.confirm('Are you sure you want to delete this case?')) return; 52 | 53 | setIsDeleting(true); 54 | try { 55 | const response = await fetch(`${noOcrApiUrl}/delete_case/${caseItem.name}?user_id=${user.id}`, { 56 | method: 'DELETE', 57 | }); 58 | 59 | if (!response.ok) throw new Error('Failed to delete case'); 60 | 61 | // Refresh the page after successful deletion 62 | window.location.reload(); 63 | } catch (error) { 64 | console.error('Error deleting case:', error); 65 | setApiMessage('Failed to delete case'); 66 | } finally { 67 | setIsDeleting(false); 68 | } 69 | }; 70 | 71 | const getStatusColor = () => { 72 | switch (status) { 73 | case 'done': 74 | return 'text-green-600'; 75 | case 'processing': 76 | return 'text-yellow-600'; 77 | case 'error': 78 | return 'text-red-600'; 79 | default: 80 | return 'text-gray-600'; 81 | } 82 | }; 83 | 84 | return ( 85 |
86 |
87 |
88 | 89 | 90 | {caseItem.name} 91 | 92 |
93 | 94 |
95 |

96 | Status: {status} 97 | {status === 'processing' && ( 98 | 99 | 100 | 101 | 102 | )} 103 |

104 |

105 | {caseItem.number_of_PDFs} PDF{caseItem.number_of_PDFs !== 1 ? 's' : ''} 106 |

107 |
    108 | {caseItem.files.map((file, index) => ( 109 |
  • {file}
  • 110 | ))} 111 |
112 |
113 | 114 | {user && ( 115 | 122 | )} 123 | 124 | {apiMessage && ( 125 |
126 |

{apiMessage}

127 |
128 | )} 129 |
130 |
131 | ); 132 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/collections/CaseList.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useState } from 'react'; 2 | import { Case } from '../../types/collection'; 3 | import { CaseCard } from './CaseCard'; 4 | import { LoadingSpinner } from '../shared/LoadingSpinner'; 5 | import { EmptyState } from '../shared/EmptyState'; 6 | import { noOcrApiUrl } from '../../config/api'; 7 | import { useAuthStore } from '../../stores/authStore'; 8 | 9 | export function CaseList() { 10 | const [cases, setCases] = useState([]); 11 | const [isLoading, setIsLoading] = useState(true); 12 | const user = useAuthStore((state) => state.user); 13 | 14 | useEffect(() => { 15 | async function fetchCases() { 16 | if (!user) { 17 | setIsLoading(false); 18 | return; 19 | } 20 | 21 | try { 22 | const response = await fetch(`${noOcrApiUrl}/get_cases?user_id=${user.id}`); 23 | if (!response.ok) throw new Error('Network response was not ok'); 24 | const data = await response.json(); 25 | setCases(data.cases || []); 26 | } catch (error) { 27 | console.error('Error fetching cases:', error); 28 | } finally { 29 | setIsLoading(false); 30 | } 31 | } 32 | 33 | fetchCases(); 34 | }, [user]); 35 | 36 | if (isLoading) return ; 37 | 38 | if (cases.length === 0) { 39 | return ( 40 | 46 | ); 47 | } 48 | 49 | return ( 50 |
51 | {cases.map((caseItem) => ( 52 | 53 | ))} 54 |
55 | ); 56 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/layout/NavLink.tsx: -------------------------------------------------------------------------------- 1 | import { Link, useLocation } from 'react-router-dom'; 2 | 3 | interface NavLinkProps { 4 | to: string; 5 | icon: React.ReactNode; 6 | label: string; 7 | } 8 | 9 | export function NavLink({ to, icon, label }: NavLinkProps) { 10 | const location = useLocation(); 11 | const isActive = location.pathname === to; 12 | 13 | return ( 14 | 22 | {icon} 23 | {label} 24 | 25 | ); 26 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/layout/Navbar.tsx: -------------------------------------------------------------------------------- 1 | import { FileSearch, FolderPlus, Info, Library, Github } from 'lucide-react'; 2 | import { Link } from 'react-router-dom'; 3 | import { LogoutButton } from '../auth/LogoutButton'; 4 | import { useAuthStore } from '../../stores/authStore'; 5 | import { NavLink } from './NavLink'; 6 | 7 | export default function Navbar() { 8 | const { user } = useAuthStore(); 9 | 10 | return ( 11 | 79 | ); 80 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/search/CollectionSelect.tsx: -------------------------------------------------------------------------------- 1 | import { Case } from '../../types/collection'; 2 | 3 | interface CaseSelectProps { 4 | cases: Case[]; 5 | selectedCase: string; 6 | onCaseChange: (caseId: string) => void; 7 | } 8 | 9 | export function CaseSelect({ 10 | cases, 11 | selectedCase, 12 | onCaseChange, 13 | }: CaseSelectProps) { 14 | return ( 15 |
16 | 19 | 33 |
34 | ); 35 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/search/SearchResults.tsx: -------------------------------------------------------------------------------- 1 | import { SearchResult } from '../../types/collection'; 2 | 3 | interface SearchResultsProps { 4 | results: SearchResult[]; 5 | } 6 | 7 | export function SearchResults({ results }: SearchResultsProps) { 8 | if (results.length === 0) return null; 9 | 10 | return ( 11 |
12 |

Search Results

13 |
14 | {results.map((result, index) => ( 15 |
19 |

{result.documentName}

20 |

Page {result.pageNumber}

21 |

{result.excerpt}

22 |
23 | Relevance: {(result.relevanceScore * 100).toFixed(1)}% 24 |
25 |
26 | ))} 27 |
28 |
29 | ); 30 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/shared/EmptyState.tsx: -------------------------------------------------------------------------------- 1 | import { FolderPlus } from 'lucide-react'; 2 | import { Link } from 'react-router-dom'; 3 | 4 | interface EmptyStateProps { 5 | title: string; 6 | description: string; 7 | actionLink: string; 8 | actionText: string; 9 | } 10 | 11 | export function EmptyState({ title, description, actionLink, actionText }: EmptyStateProps) { 12 | return ( 13 |
14 | 15 |

{title}

16 |

{description}

17 |
18 | 22 | {actionText} 23 | 24 |
25 |
26 | ); 27 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/components/shared/LoadingSpinner.tsx: -------------------------------------------------------------------------------- 1 | export function LoadingSpinner() { 2 | return ( 3 |
4 |
5 |
6 | ); 7 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/config/api.ts: -------------------------------------------------------------------------------- 1 | export const noOcrApiUrl = import.meta.env.VITE_REACT_APP_API_URI; -------------------------------------------------------------------------------- /no-ocr-ui/src/config/supabase.ts: -------------------------------------------------------------------------------- 1 | import { createClient } from '@supabase/supabase-js'; 2 | 3 | const supabaseUrl = import.meta.env.VITE_SUPABASE_URL; 4 | const supabaseAnonKey = import.meta.env.VITE_SUPABASE_ANON_KEY; 5 | 6 | if (!supabaseUrl || !supabaseAnonKey) { 7 | throw new Error('Missing Supabase environment variables'); 8 | } 9 | 10 | export const supabase = createClient(supabaseUrl, supabaseAnonKey); -------------------------------------------------------------------------------- /no-ocr-ui/src/env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | 3 | interface ImportMetaEnv { 4 | readonly VITE_SUPABASE_URL: string 5 | readonly VITE_SUPABASE_ANON_KEY: string 6 | readonly RAILWAY_TOKEN: string 7 | readonly MODAL_TOKEN_ID: string 8 | readonly MODAL_TOKEN_SECRET: string 9 | readonly VITE_REACT_APP_API_URI: string 10 | } 11 | 12 | interface ImportMeta { 13 | readonly env: ImportMetaEnv 14 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/hooks/useAuth.ts: -------------------------------------------------------------------------------- 1 | import { useEffect } from 'react'; 2 | import { useNavigate } from 'react-router-dom'; 3 | import { useAuthStore } from '../stores/authStore'; 4 | 5 | export function useAuth() { 6 | const { user, isLoading } = useAuthStore(); 7 | const navigate = useNavigate(); 8 | 9 | useEffect(() => { 10 | if (!isLoading && !user) { 11 | navigate('/login'); 12 | } 13 | }, [user, isLoading, navigate]); 14 | 15 | return { user, isLoading }; 16 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/index.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | -------------------------------------------------------------------------------- /no-ocr-ui/src/lib/supabase.ts: -------------------------------------------------------------------------------- 1 | import { createClient } from '@supabase/supabase-js'; 2 | 3 | // Provide default values for development to prevent initialization errors 4 | const supabaseUrl = import.meta.env.VITE_SUPABASE_URL || 'http://localhost:54321'; 5 | const supabaseAnonKey = import.meta.env.VITE_SUPABASE_ANON_KEY || 'placeholder-key'; 6 | 7 | export const supabase = createClient(supabaseUrl, supabaseAnonKey); -------------------------------------------------------------------------------- /no-ocr-ui/src/main.tsx: -------------------------------------------------------------------------------- 1 | import { StrictMode } from 'react'; 2 | import { createRoot } from 'react-dom/client'; 3 | import App from './App.tsx'; 4 | import './index.css'; 5 | 6 | createRoot(document.getElementById('root')!).render( 7 | 8 | 9 | 10 | ); 11 | -------------------------------------------------------------------------------- /no-ocr-ui/src/stores/authStore.ts: -------------------------------------------------------------------------------- 1 | import { create } from 'zustand'; 2 | import { AuthState } from '../types/auth'; 3 | import { supabase } from '../lib/supabase'; 4 | 5 | export const useAuthStore = create((set) => ({ 6 | user: null, 7 | isLoading: true, 8 | })); 9 | 10 | // Initialize auth state 11 | export const initializeAuth = async () => { 12 | try { 13 | const { data: { session } } = await supabase.auth.getSession(); 14 | useAuthStore.setState({ 15 | user: session?.user ?? null, 16 | isLoading: false, 17 | }); 18 | 19 | // Set up auth state change listener 20 | supabase.auth.onAuthStateChange((_event, session) => { 21 | useAuthStore.setState({ 22 | user: session?.user ?? null, 23 | isLoading: false, 24 | }); 25 | }); 26 | } catch (error) { 27 | console.error('Error initializing auth:', error); 28 | useAuthStore.setState({ isLoading: false }); 29 | } 30 | }; -------------------------------------------------------------------------------- /no-ocr-ui/src/types/auth.ts: -------------------------------------------------------------------------------- 1 | export interface User { 2 | id: string; 3 | email: string; 4 | created_at: string; 5 | } 6 | 7 | export interface AuthState { 8 | user: User | null; 9 | isLoading: boolean; 10 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/types/collection.ts: -------------------------------------------------------------------------------- 1 | export interface Case { 2 | name: string; 3 | status: string; 4 | number_of_PDFs: number; 5 | files: string[]; 6 | documentCount: number; 7 | createdAt: string; 8 | id: string; 9 | } 10 | 11 | export interface SearchResult { 12 | documentName: string; 13 | excerpt: string; 14 | relevanceScore: number; 15 | pageNumber: number; 16 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/types/index.ts: -------------------------------------------------------------------------------- 1 | export interface Case { 2 | id: string; 3 | name: string; 4 | documentCount: number; 5 | createdAt: string; 6 | } 7 | 8 | export interface SearchResult { 9 | documentName: string; 10 | excerpt: string; 11 | relevanceScore: number; 12 | pageNumber: number; 13 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/utils/date.ts: -------------------------------------------------------------------------------- 1 | export function formatDate(dateString: string): string { 2 | const date = new Date(dateString); 3 | if (isNaN(date.getTime())) { 4 | return 'Invalid date'; 5 | } 6 | return new Intl.RelativeTimeFormat('en', { numeric: 'auto' }).format( 7 | Math.ceil((date.getTime() - Date.now()) / (1000 * 60 * 60 * 24)), 8 | 'day' 9 | ); 10 | } -------------------------------------------------------------------------------- /no-ocr-ui/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /no-ocr-ui/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: ['./index.html', './src/**/*.{js,ts,jsx,tsx}'], 4 | theme: { 5 | extend: { 6 | colors: { 7 | primary: '#FFD700', // Yellow 8 | secondary: '#000000', // Black 9 | background: '#FFFFFF', // White 10 | }, 11 | fontFamily: { 12 | heading: ['"Roboto Mono"', 'monospace'], 13 | body: ['"Open Sans"', 'sans-serif'], 14 | }, 15 | }, 16 | }, 17 | plugins: [], 18 | }; 19 | -------------------------------------------------------------------------------- /no-ocr-ui/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "useDefineForClassFields": true, 5 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 6 | "module": "ESNext", 7 | "skipLibCheck": true, 8 | 9 | /* Bundler mode */ 10 | "moduleResolution": "bundler", 11 | "allowImportingTsExtensions": true, 12 | "isolatedModules": true, 13 | "moduleDetection": "force", 14 | "noEmit": true, 15 | "jsx": "react-jsx", 16 | 17 | /* Linting */ 18 | "strict": true, 19 | "noUnusedLocals": true, 20 | "noUnusedParameters": true, 21 | "noFallthroughCasesInSwitch": true 22 | }, 23 | "include": ["src"] 24 | } 25 | -------------------------------------------------------------------------------- /no-ocr-ui/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { "path": "./tsconfig.app.json" }, 5 | { "path": "./tsconfig.node.json" } 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /no-ocr-ui/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "lib": ["ES2023"], 5 | "module": "ESNext", 6 | "skipLibCheck": true, 7 | 8 | /* Bundler mode */ 9 | "moduleResolution": "bundler", 10 | "allowImportingTsExtensions": true, 11 | "isolatedModules": true, 12 | "moduleDetection": "force", 13 | "noEmit": true, 14 | 15 | /* Linting */ 16 | "strict": true, 17 | "noUnusedLocals": true, 18 | "noUnusedParameters": true, 19 | "noFallthroughCasesInSwitch": true 20 | }, 21 | "include": ["vite.config.ts"] 22 | } 23 | -------------------------------------------------------------------------------- /no-ocr-ui/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite'; 2 | import react from '@vitejs/plugin-react'; 3 | 4 | // https://vitejs.dev/config/ 5 | export default defineConfig({ 6 | plugins: [react()], 7 | optimizeDeps: { 8 | exclude: ['lucide-react'], 9 | }, 10 | }); 11 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | line-length = 120 3 | select = ["E", "F", "I", "N", "Q", "W"] 4 | exclude = ["colpali"] 5 | fix = true 6 | --------------------------------------------------------------------------------