├── .env.example ├── .gitignore ├── .python-version ├── Dockerfile ├── LICENSE ├── README.md ├── assets └── cygen logo.png ├── docker-compose.yml ├── docker └── entrypoint.sh ├── pyproject.toml ├── src ├── main.py ├── router │ ├── __init__.py │ ├── chat.py │ └── upload.py ├── settings.py └── utils │ ├── __init__.py │ ├── background_tasks.py │ ├── llm.py │ ├── pdf_processor.py │ ├── text_chunking.py │ └── vector_store.py ├── start.sh ├── streamlit ├── Dockerfile ├── app.py ├── config.toml ├── run-docker.sh ├── run.sh └── utils.py └── uv.lock /.env.example: -------------------------------------------------------------------------------- 1 | # Server Settings 2 | DEBUG=false 3 | API_V1_PREFIX=/api/v1 4 | 5 | # Processing Settings 6 | MAX_WORKERS=4 7 | CHUNK_SIZE=512 8 | CHUNK_OVERLAP=50 9 | 10 | # Database Settings 11 | MONGODB_URL=mongodb://mongodb:27017 12 | MONGODB_DB_NAME=rag_system 13 | 14 | # Vector Store Settings 15 | QDRANT_URL=http://qdrant:6333 16 | QDRANT_API_KEY=your_qdrant_api_key 17 | COLLECTION_NAME=documents 18 | 19 | # LLM Settings 20 | GROQ_API_KEY=your_groq_api_key 21 | MODEL_NAME=mixtral-8x7b-32768 22 | TEMPERATURE=0.7 23 | 24 | # PDF Processing 25 | OCR_ENABLED=true 26 | PDF_UPLOAD_DIR=uploads -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | 176 | # Additional files 177 | cybersec-report/ 178 | .cursorrules 179 | *.DS_Store 180 | uploads/ -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use Python 3.11 slim image 2 | FROM python:3.11-slim 3 | 4 | # Set working directory 5 | WORKDIR /app 6 | 7 | # Set environment variables 8 | ENV PYTHONUNBUFFERED=1 \ 9 | PYTHONDONTWRITEBYTECODE=1 \ 10 | UV_SYSTEM_PYTHON=1 \ 11 | PATH="/root/.local/bin:$PATH" 12 | 13 | # Install system dependencies 14 | RUN apt-get update && apt-get install -y --no-install-recommends \ 15 | build-essential \ 16 | libpq-dev \ 17 | curl \ 18 | && rm -rf /var/lib/apt/lists/* 19 | 20 | # Install uv 21 | RUN curl -LsSf https://astral.sh/uv/install.sh | sh 22 | 23 | # Copy project files 24 | COPY pyproject.toml . 25 | COPY . . 26 | 27 | # Create required directories 28 | RUN mkdir -p uploads logs && chmod 777 uploads logs 29 | 30 | # Create venv and install dependencies 31 | RUN uv venv && \ 32 | . .venv/bin/activate && \ 33 | uv pip install -e . 34 | 35 | # Development mode: Use entrypoint script 36 | COPY docker/entrypoint.sh /entrypoint.sh 37 | RUN chmod +x /entrypoint.sh 38 | 39 | ENTRYPOINT ["/entrypoint.sh"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # CyGen: Self-Hosted LLM for Cybersecurity Analysis 🛡️ 4 | 5 | ![Version](https://img.shields.io/badge/version-1.0.0-blue.svg?cacheSeconds=2592000) 6 | ![Python](https://img.shields.io/badge/Python-3.11+-blue.svg) 7 | ![FastAPI](https://img.shields.io/badge/FastAPI-0.103.0-009688.svg) 8 | ![Streamlit](https://img.shields.io/badge/Streamlit-1.27.0-FF4B4B.svg) 9 | ![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg) 10 | 11 |
12 | 13 |

14 | CyGen Banner 15 |

16 | 17 | CyGen is a powerful Retrieval-Augmented Generation (RAG) system built with FastAPI, MongoDB, Qdrant, and Groq LLM, featuring a Streamlit frontend for seamless interaction. This system allows you to upload PDF documents, process them intelligently, and have natural language conversations about their content. 18 | 19 | ## ✨ Features 20 | 21 | - **📄 Advanced PDF Document Ingestion** 22 | - Multi-threaded PDF processing 23 | - Intelligent text chunking with configurable parameters 24 | - Background task queue for non-blocking operations 25 | - Progress tracking for document processing 26 | 27 | - **🔍 Smart Vector Search** 28 | - Semantic similarity search using embeddings 29 | - Context-aware document retrieval 30 | - Configurable relevance thresholds 31 | - Metadata-enhanced document chunks 32 | 33 | - **💬 Interactive Chat Interface** 34 | - Real-time chat with HTTP POST endpoint 35 | - Context window management 36 | - Conversation history with MongoDB 37 | - Automatic conversation titles generation 38 | 39 | - **🧠 Groq LLM Integration** 40 | - Fast inference with 8k context window 41 | - Optimized prompting strategy 42 | - Balanced context retrieval 43 | - Temperature control for response diversity 44 | 45 | - **🖥️ User-friendly Web UI** 46 | - Document upload with progress indicators 47 | - Conversation management 48 | - Responsive design 49 | - Real-time chat updates 50 | 51 | ## 🏗️ System Architecture 52 | 53 |
54 | 55 | ```mermaid 56 | flowchart TD 57 | subgraph Client 58 | UI[Streamlit Frontend] 59 | end 60 | 61 | subgraph Backend 62 | API[FastAPI Backend] 63 | TaskQueue[Background Task Queue] 64 | VectorDB[(Qdrant Vector DB)] 65 | MongoDB[(MongoDB)] 66 | LLM[Groq LLM API] 67 | end 68 | 69 | subgraph Processing 70 | PDF[PDF Processor] 71 | Chunker[Text Chunker] 72 | Embedder[Embedding Model] 73 | end 74 | 75 | %% Client to Backend interactions 76 | UI -->|1. Upload PDF| API 77 | UI -->|5. Send Query| API 78 | API -->|8. Stream Response| UI 79 | 80 | %% Document Processing Flow 81 | API -->|2. Process Document| TaskQueue 82 | TaskQueue -->|3. Extract & Chunk| PDF 83 | PDF -->|3.1. Split Text| Chunker 84 | Chunker -->|3.2. Generate Embeddings| Embedder 85 | Embedder -->|3.3. Store Vectors| VectorDB 86 | Embedder -->|3.4. Store Metadata| MongoDB 87 | 88 | %% Query Processing Flow 89 | API -->|6. Retrieve Context| VectorDB 90 | API -->|6.1. Get History| MongoDB 91 | API -->|7. Generate Response| LLM 92 | VectorDB -->|6.2. Relevant Chunks| API 93 | MongoDB -->|6.3. Conversation History| API 94 | 95 | %% Styles 96 | classDef primary fill:#4527A0,stroke:#4527A0,color:white,stroke-width:2px 97 | classDef secondary fill:#7E57C2,stroke:#7E57C2,color:white 98 | classDef database fill:#1A237E,stroke:#1A237E,color:white 99 | classDef processor fill:#FF7043,stroke:#FF7043,color:white 100 | classDef client fill:#00ACC1,stroke:#00ACC1,color:white 101 | 102 | class API,TaskQueue primary 103 | class PDF,Chunker,Embedder processor 104 | class VectorDB,MongoDB database 105 | class LLM secondary 106 | class UI client 107 | ``` 108 | 109 |
110 | 111 | The system comprises several key components that work together: 112 | 113 | - **FastAPI Backend** 114 | - RESTful API endpoints and background task processing 115 | - Asynchronous request handling for high concurrency 116 | - Dependency injection for clean service management 117 | - Error handling and logging 118 | 119 | - **MongoDB** 120 | - Conversation history storage 121 | - Document metadata and status tracking 122 | - Asynchronous operations with Motor client 123 | - Indexed collections for fast retrieval 124 | 125 | - **Qdrant Vector Database** 126 | - High-performance vector storage and retrieval 127 | - Scalable embedding storage 128 | - Similarity search with metadata filtering 129 | - Optimized for semantic retrieval 130 | 131 | - **Groq LLM Integration** 132 | - Ultra-fast inference for responsive conversation 133 | - 8k token context window 134 | - Adaptive system prompts based on query context 135 | - Clean API integration with error handling 136 | 137 | - **Streamlit Frontend** 138 | - Intuitive user interface for document uploads 139 | - Conversation management and history 140 | - Real-time chat interaction 141 | - Mobile-responsive design 142 | 143 | ## ⚙️ Technical Details 144 | 145 | ### PDF Processing Pipeline 146 | 147 | Our PDF processing pipeline is designed for efficiency and accuracy: 148 | 149 | 1. **Text Extraction**: Extract raw text from PDF documents using PyPDF2 150 | 2. **Text Cleaning**: Remove artifacts and normalize text 151 | 3. **Chunking Strategy**: Implement recursive chunking with smart boundary detection 152 | 4. **Metadata Enrichment**: Add page numbers, file paths, and other metadata 153 | 5. **Vector Embedding**: Generate embeddings for each chunk 154 | 6. **Storage**: Store vectors in Qdrant and metadata in MongoDB 155 | 156 | ### RAG Implementation 157 | 158 | The RAG system follows a sophisticated approach to content retrieval: 159 | 160 | 1. **Query Analysis**: Analyze user query for intent and keywords 161 | 2. **Context Retrieval**: Retrieve relevant document chunks from vector store 162 | 3. **Threshold Filtering**: Filter results based on similarity score threshold 163 | 4. **Context Assembly**: Combine retrieved chunks with conversation history 164 | 5. **Prompt Construction**: Build prompt with system instructions and context 165 | 6. **LLM Generation**: Generate response using Groq LLM 166 | 7. **Response Delivery**: Deliver response to user in real-time 167 | 168 | ## 🚀 Getting Started 169 | 170 | ### Prerequisites 171 | 172 | - Docker and Docker Compose 173 | - Python 3.11+ 174 | - uv package manager (recommended for local development) 175 | - Groq API key 176 | - MongoDB instance (local or Atlas) 177 | - Qdrant instance (local or cloud) 178 | 179 | ### Environment Setup 180 | 181 | 1. Clone the repository: 182 | ```bash 183 | git clone https://github.com/yourusername/cygen.git 184 | cd cygen 185 | ``` 186 | 187 | 2. Copy the example environment file: 188 | ```bash 189 | cp .env.example .env 190 | ``` 191 | 192 | 3. Update the following variables in `.env`: 193 | ``` 194 | GROQ_API_KEY=your_groq_api_key 195 | MONGODB_URL=mongodb://username:password@host:port/db_name 196 | QDRANT_URL=http://qdrant_host:port 197 | MAX_WORKERS=4 198 | CHUNK_SIZE=512 199 | CHUNK_OVERLAP=50 200 | TOP_K=5 201 | RAG_THRESHOLD=0.75 202 | TEMPERATURE=0.7 203 | N_LAST_MESSAGE=5 204 | ``` 205 | 206 | ### Running the Application 207 | 208 | #### Option 1: Using the Interactive Launcher Script 209 | 210 | ```bash 211 | chmod +x start.sh 212 | ./start.sh 213 | ``` 214 | 215 | The launcher offers the following options: 216 | 1. Start both the FastAPI backend and Streamlit frontend with Docker Compose 217 | 2. Start only the FastAPI backend 218 | 3. Start only the Streamlit frontend (with Docker or locally) 219 | 220 | #### Option 2: Using Docker Compose 221 | 222 | Start all services: 223 | ```bash 224 | docker-compose up --build 225 | ``` 226 | 227 | Start only specific services: 228 | ```bash 229 | docker-compose up --build app # Backend only 230 | docker-compose up --build streamlit # Frontend only 231 | ``` 232 | 233 | #### Option 3: Running Locally (Development) 234 | 235 | 1. Create and activate a virtual environment: 236 | ```bash 237 | uv venv 238 | source .venv/bin/activate # Linux/macOS 239 | .venv\Scripts\activate # Windows 240 | ``` 241 | 242 | 2. Install dependencies: 243 | ```bash 244 | uv pip install -e . 245 | ``` 246 | 247 | 3. Start the FastAPI backend: 248 | ```bash 249 | uvicorn src.main:app --reload --port 8000 250 | ``` 251 | 252 | 4. Start the Streamlit frontend (in a separate terminal): 253 | ```bash 254 | cd streamlit 255 | ./run.sh # or `streamlit run app.py` 256 | ``` 257 | 258 | ### Accessing the Application 259 | 260 | - **Streamlit Frontend**: http://localhost:8501 261 | - **FastAPI Swagger Docs**: http://localhost:8000/docs 262 | - **API Base URL**: http://localhost:8000/api/v1 263 | 264 | ## 📋 Usage Guide 265 | 266 | ### Document Upload 267 | 268 | 1. Navigate to the Streamlit web interface 269 | 2. Click on the "Upload Documents" section in the sidebar 270 | 3. Select a PDF file (limit: 200MB per file) 271 | 4. Click "Process Document" 272 | 5. Wait for the processing to complete (progress will be displayed) 273 | 274 | ### Creating a Conversation 275 | 276 | 1. Click "New Conversation" in the sidebar 277 | 2. A new conversation will be created with a temporary title 278 | 3. The title will be automatically updated based on your first message 279 | 280 | ### Chatting with Your Documents 281 | 282 | 1. Type your question in the chat input 283 | 2. The system will: 284 | - Retrieve relevant context from your documents 285 | - Consider your conversation history 286 | - Generate a comprehensive answer 287 | 3. Continue the conversation with follow-up questions 288 | 289 | ### Managing Conversations 290 | 291 | - All your conversations are saved and accessible from the sidebar 292 | - Select any conversation to continue where you left off 293 | - Conversation history is preserved between sessions 294 | 295 | ## 🔧 API Endpoints 296 | 297 | The system exposes the following key API endpoints: 298 | 299 | ### Documents API 300 | 301 | - `POST /api/v1/documents/upload`: Upload a PDF document 302 | - `GET /api/v1/documents/task/{task_id}`: Check document processing status 303 | 304 | ### Chat API 305 | 306 | - `PUT /api/v1/chat/conversation`: Create a new conversation 307 | - `GET /api/v1/chat/conversations`: List all conversations 308 | - `GET /api/v1/chat/conversations/{conversation_id}`: Get a specific conversation 309 | - `DELETE /api/v1/chat/conversations/{conversation_id}`: Delete a conversation 310 | - `POST /api/v1/chat/{conversation_id}`: Send a message in a conversation 311 | 312 | ## 📁 Project Structure 313 | 314 | ``` 315 | . 316 | ├── docker/ # Docker configuration files 317 | │ ├── app/ # Backend Docker setup 318 | │ └── streamlit/ # Frontend Docker setup 319 | ├── logs/ # Application logs 320 | ├── src/ # Backend source code 321 | │ ├── router/ # API route definitions 322 | │ │ ├── chat.py # Chat endpoints 323 | │ │ └── documents.py # Document endpoints 324 | │ ├── utils/ # Utility modules 325 | │ │ ├── llm.py # LLM integration 326 | │ │ ├── pdf_processor.py # PDF processing 327 | │ │ ├── text_chunking.py # Text chunking 328 | │ │ └── vector_store.py # Vector database interface 329 | │ ├── main.py # FastAPI application entry 330 | │ └── settings.py # Application settings 331 | ├── streamlit/ # Streamlit frontend 332 | │ ├── app.py # Main Streamlit application 333 | │ └── utils.py # Frontend utilities 334 | ├── tests/ # Test suite 335 | │ ├── unit/ # Unit tests 336 | │ └── integration/ # Integration tests 337 | ├── uploads/ # Uploaded documents storage 338 | ├── .env.example # Example environment variables 339 | ├── docker-compose.yml # Docker Compose configuration 340 | ├── Dockerfile # Backend Dockerfile 341 | ├── pyproject.toml # Python project configuration 342 | ├── start.sh # Interactive launcher script 343 | └── README.md # Project documentation 344 | ``` 345 | 346 | ## 🛠️ Configuration Options 347 | 348 | The system can be configured through environment variables: 349 | 350 | | Variable | Description | Default | 351 | |----------|-------------|---------| 352 | | `GROQ_API_KEY` | Groq API key for LLM integration | - | 353 | | `MONGODB_URL` | MongoDB connection string | mongodb://localhost:27017 | 354 | | `MONGODB_DB_NAME` | MongoDB database name | rag_system | 355 | | `QDRANT_URL` | Qdrant server URL | http://localhost:6333 | 356 | | `MAX_WORKERS` | Maximum worker threads for PDF processing | 4 | 357 | | `CHUNK_SIZE` | Target chunk size for document splitting | 512 | 358 | | `CHUNK_OVERLAP` | Overlap between consecutive chunks | 50 | 359 | | `TOP_K` | Number of chunks to retrieve per query | 5 | 360 | | `RAG_THRESHOLD` | Similarity threshold for relevance | 0.75 | 361 | | `TEMPERATURE` | LLM temperature setting | 0.7 | 362 | | `N_LAST_MESSAGE` | Number of previous messages to include | 5 | 363 | 364 | ## 🤝 Contributing 365 | 366 | Contributions are welcome! Here's how you can help: 367 | 368 | 1. Fork the repository 369 | 2. Create a feature branch: `git checkout -b feature/amazing-feature` 370 | 3. Commit your changes: `git commit -m 'Add amazing feature'` 371 | 4. Push to the branch: `git push origin feature/amazing-feature` 372 | 5. Open a pull request 373 | 374 | Please ensure your code follows our style guidelines and includes appropriate tests. 375 | 376 | ## 📝 License 377 | 378 | This project is licensed under the MIT License - see the LICENSE file for details. 379 | 380 | ## 📧 Contact 381 | 382 | Project Link: [https://github.com/NnA301023/cygen](https://github.com/NnA301023/cygen) 383 | 384 | ## 🌐 Connect With Us 385 | 386 | ### Our Platforms 387 | - **Magazine**: [ITSec Buzz](https://itsec.buzz/) 388 | - **Engineering Space**: [ITSec Asia Tech](https://www.itsecasia.tech/) 389 | 390 | ### Social Media 391 | - [Instagram](https://www.instagram.com/rndforge.official/) 392 | - [TikTok](https://www.tiktok.com/@rndforge) 393 | - [Threads](https://www.threads.net/@rndforge.official) 394 | - [YouTube](https://www.youtube.com/@rndforgeofficial) 395 | 396 | --- 397 | 398 |
399 |

Built with ❤️ by RnD Team

400 |
-------------------------------------------------------------------------------- /assets/cygen logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITSEC-Research/cygen/12682278a30ca52cd5e159907a49b42540ed6486/assets/cygen logo.png -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | app: 3 | build: . 4 | ports: 5 | - "8000:8000" 6 | volumes: 7 | - .:/app 8 | - ./uploads:/app/uploads 9 | - ./logs:/app/logs 10 | environment: 11 | - ENVIRONMENT=development # Change to 'production' for production mode 12 | - MAX_WORKERS=4 13 | - PYTHONPATH=/app 14 | env_file: 15 | - .env 16 | depends_on: 17 | - mongodb 18 | - qdrant 19 | develop: 20 | watch: 21 | - path: ./service 22 | target: /app/service 23 | action: sync 24 | 25 | streamlit: 26 | build: 27 | context: . 28 | dockerfile: streamlit/Dockerfile 29 | ports: 30 | - "8501:8501" 31 | volumes: 32 | - ./streamlit:/app/streamlit 33 | - ./uploads:/app/uploads 34 | - ./logs:/app/logs 35 | environment: 36 | - API_URL=http://app:8000 37 | - WS_URL=ws://app:8000 38 | - PYTHONPATH=/app 39 | - STREAMLIT_SERVER_PORT=8501 40 | - STREAMLIT_SERVER_HEADLESS=true 41 | - STREAMLIT_BROWSER_GATHER_USAGE_STATS=false 42 | - MONGODB_URL=mongodb://mongodb:27017 43 | env_file: 44 | - .env 45 | depends_on: 46 | - app 47 | restart: unless-stopped 48 | healthcheck: 49 | test: ["CMD", "curl", "--fail", "http://localhost:8501/_stcore/health"] 50 | interval: 30s 51 | timeout: 10s 52 | retries: 3 53 | start_period: 5s 54 | develop: 55 | watch: 56 | - path: ./streamlit 57 | target: /app/streamlit 58 | action: sync 59 | 60 | mongodb: 61 | image: mongo:latest 62 | ports: 63 | - "27017:27017" 64 | volumes: 65 | - mongodb_data:/data/db 66 | environment: 67 | - MONGO_INITDB_DATABASE=rag_system 68 | 69 | qdrant: 70 | image: qdrant/qdrant:latest 71 | ports: 72 | - "6333:6333" 73 | - "6334:6334" 74 | volumes: 75 | - qdrant_data:/qdrant/storage 76 | environment: 77 | - QDRANT_API_KEY=${QDRANT_API_KEY} 78 | 79 | volumes: 80 | mongodb_data: 81 | qdrant_data: -------------------------------------------------------------------------------- /docker/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Check runtime directory 5 | echo "Current directory contents:" 6 | ls -la 7 | 8 | # Activate virtual environment 9 | . .venv/bin/activate 10 | 11 | # Install dependencies in development mode 12 | if [ "$ENVIRONMENT" = "development" ]; then 13 | echo "Installing dependencies in development mode..." 14 | uv pip install -e . 15 | else 16 | echo "Installing dependencies in production mode..." 17 | uv pip install . 18 | fi 19 | 20 | # Run the application with hot reload in development 21 | if [ "$ENVIRONMENT" = "development" ]; then 22 | echo "Starting server in development mode with hot reload..." 23 | exec uvicorn src.main:app --host 0.0.0.0 --port 8000 --reload --reload-dir /app/src 24 | else 25 | echo "Starting server in production mode..." 26 | exec uvicorn src.main:app --host 0.0.0.0 --port 8000 --workers $MAX_WORKERS 27 | fi -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "cygen" 3 | version = "0.1.0" 4 | description = "Advanced RAG System with Groq LLM" 5 | readme = "README.md" 6 | requires-python = ">=3.11" 7 | dependencies = [ 8 | 9 | # FastAPI and Server 10 | "fastapi>=0.104.0", 11 | "uvicorn[standard]>=0.24.0", 12 | "websockets>=12.0", 13 | "python-multipart>=0.0.6", 14 | "pydantic>=2.5.0", 15 | "pydantic-settings>=2.1.0", 16 | 17 | # Database 18 | "motor>=3.3.0", 19 | "qdrant-client>=1.7.0", 20 | 21 | # PDF Processing 22 | "PyPDF2>=3.0.0", 23 | 24 | # LLM and Embeddings 25 | "groq>=0.4.0", 26 | "fastembed>=0.2.0", 27 | "langchain>=0.1.9", 28 | 29 | # Utilities 30 | "python-dotenv>=1.0.0", 31 | "tenacity>=8.2.3", 32 | "loguru>=0.7.2", 33 | "aiofiles>=23.2.1", 34 | 35 | # Streamlit UI 36 | "streamlit>=1.31.0", 37 | "streamlit-chat>=0.1.1", 38 | "httpx>=0.25.2", 39 | "watchdog>=3.0.0", 40 | "websocket-client>=1.8.0", 41 | ] 42 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI, HTTPException 2 | from fastapi.middleware.cors import CORSMiddleware 3 | from fastapi.responses import JSONResponse 4 | from contextlib import asynccontextmanager 5 | from loguru import logger 6 | import asyncio 7 | 8 | from .settings import get_settings 9 | from .router import upload, chat 10 | 11 | # Load settings 12 | settings = get_settings() 13 | 14 | # Configure logger 15 | logger.add( 16 | "logs/app.log", 17 | rotation="500 MB", 18 | retention="10 days", 19 | level="INFO" 20 | ) 21 | 22 | @asynccontextmanager 23 | async def lifespan(app: FastAPI): 24 | """ 25 | Lifespan context manager for the FastAPI application. 26 | Handles startup and shutdown events. 27 | """ 28 | 29 | # Service Startup... 30 | app.state.max_workers = settings.MAX_WORKERS 31 | app.state.processing_semaphore = asyncio.Semaphore(settings.MAX_WORKERS) 32 | logger.info(f"Server starting with {settings.MAX_WORKERS} workers") 33 | 34 | yield # Server is running 35 | 36 | # Service Shutdown 37 | logger.info("Server shutting down") 38 | 39 | # Initialize FastAPI app 40 | app = FastAPI( 41 | title=settings.APP_NAME, 42 | description="Advanced RAG System with Groq LLM", 43 | version="1.0.0", 44 | docs_url="/docs", 45 | redoc_url="/redoc", 46 | lifespan=lifespan 47 | ) 48 | 49 | # Configure CORS 50 | app.add_middleware( 51 | CORSMiddleware, 52 | allow_origins=["*"], 53 | allow_credentials=True, 54 | allow_methods=["*"], 55 | allow_headers=["*"], 56 | ) 57 | 58 | # Include routers 59 | app.include_router( 60 | upload.router, 61 | prefix=f"{settings.API_V1_PREFIX}/documents", 62 | tags=["Document Processing"] 63 | ) 64 | 65 | app.include_router( 66 | chat.router, 67 | prefix=f"{settings.API_V1_PREFIX}/chat", 68 | tags=["Chat"] 69 | ) 70 | 71 | # Health check endpoint 72 | @app.get("/", include_in_schema=False) 73 | async def root_handler(): 74 | """Health check endpoint.""" 75 | return { 76 | "status": "healthy", 77 | "workers": settings.MAX_WORKERS, 78 | "version": "1.0.0" 79 | } 80 | 81 | # Health check endpoint 82 | @app.get("/health", include_in_schema=False) 83 | async def health_check(): 84 | """Health check endpoint.""" 85 | return {"status": "healthy"} 86 | 87 | # Error handlers 88 | @app.exception_handler(HTTPException) 89 | async def http_exception_handler(request, exc): 90 | """Handle HTTP exceptions.""" 91 | return JSONResponse( 92 | status_code=exc.status_code, 93 | content={"detail": exc.detail} 94 | ) 95 | 96 | @app.exception_handler(Exception) 97 | async def general_exception_handler(request, exc): 98 | """Handle general exceptions.""" 99 | logger.exception("Unhandled exception") 100 | return JSONResponse( 101 | status_code=500, 102 | content={"detail": "Internal server error"} 103 | ) -------------------------------------------------------------------------------- /src/router/__init__.py: -------------------------------------------------------------------------------- 1 | """Router package for the FastAPI application.""" 2 | -------------------------------------------------------------------------------- /src/router/chat.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List 2 | import json 3 | import uuid 4 | import traceback 5 | from datetime import datetime 6 | 7 | from fastapi import APIRouter, WebSocket, WebSocketDisconnect, HTTPException 8 | from pydantic import BaseModel 9 | from loguru import logger 10 | from motor.motor_asyncio import AsyncIOMotorClient 11 | 12 | from ..settings import get_settings 13 | from ..utils.vector_store import VectorStore 14 | from ..utils.llm import GroqLLM 15 | 16 | settings = get_settings() 17 | 18 | router = APIRouter() 19 | 20 | # Initialize services 21 | vector_store = VectorStore() 22 | llm = GroqLLM() 23 | 24 | class ChatMessage(BaseModel): 25 | """Chat message model.""" 26 | role: str 27 | content: str 28 | timestamp: datetime = None 29 | feedback: Dict[str, Any] = { 30 | "thumbs": None, # "up" or "down" 31 | "comment": None, # Optional feedback comment 32 | "submitted_at": None # Timestamp when feedback was submitted 33 | } 34 | 35 | def to_dict(self) -> Dict[str, Any]: 36 | """Convert message to dictionary with ISO formatted timestamp.""" 37 | data = self.model_dump() 38 | if self.timestamp: 39 | data["timestamp"] = self.timestamp.isoformat() 40 | if self.feedback and self.feedback.get("submitted_at"): 41 | data["feedback"]["submitted_at"] = self.feedback["submitted_at"].isoformat() 42 | return data 43 | 44 | class ConversationResponse(BaseModel): 45 | """Response model for conversation operations.""" 46 | id: str 47 | title: str 48 | metadata: Dict[str, Any] = {} 49 | created_at: datetime 50 | updated_at: datetime = None 51 | message_count: int = 0 52 | last_message: ChatMessage | None = None 53 | 54 | class Conversation(BaseModel): 55 | """Conversation model.""" 56 | id: str 57 | title: str 58 | messages: List[ChatMessage] 59 | metadata: Dict[str, Any] = {} 60 | created_at: datetime = None 61 | updated_at: datetime = None 62 | 63 | class ConnectionManager: 64 | """Manages WebSocket connections.""" 65 | 66 | def __init__(self): 67 | self.active_connections: Dict[str, WebSocket] = {} 68 | self.mongo_client = AsyncIOMotorClient(settings.MONGODB_URL) 69 | self.db = self.mongo_client[settings.MONGODB_DB_NAME] 70 | 71 | async def connect(self, websocket: WebSocket, conversation_id: str): 72 | """Connect a new client.""" 73 | # Verify conversation exists 74 | conversation = await self.get_conversation_history(conversation_id) 75 | if not conversation: 76 | raise HTTPException(status_code=404, detail="Conversation not found") 77 | 78 | await websocket.accept() 79 | self.active_connections[conversation_id] = websocket 80 | logger.info(f"Client connected to conversation {conversation_id}") 81 | 82 | def disconnect(self, conversation_id: str): 83 | """Disconnect a client.""" 84 | if conversation_id in self.active_connections: 85 | del self.active_connections[conversation_id] 86 | logger.info(f"Client disconnected from conversation {conversation_id}") 87 | 88 | async def send_message(self, conversation_id: str, message: ChatMessage): 89 | """Send a message to a specific client.""" 90 | if conversation_id in self.active_connections: 91 | websocket = self.active_connections[conversation_id] 92 | await websocket.send_json(message.to_dict()) 93 | 94 | async def get_conversation_history(self, conversation_id: str) -> Conversation: 95 | """Get conversation history from MongoDB.""" 96 | conversation = await self.db.conversations.find_one({"id": conversation_id}) 97 | if conversation: 98 | return Conversation(**conversation) 99 | return None 100 | 101 | async def save_message(self, conversation_id: str, message: ChatMessage): 102 | """Save a message to conversation history.""" 103 | now = datetime.utcnow() 104 | message.timestamp = now 105 | 106 | # Update or create conversation 107 | await self.db.conversations.update_one( 108 | {"id": conversation_id}, 109 | { 110 | "$push": {"messages": message.to_dict()}, 111 | "$set": {"updated_at": now}, 112 | "$setOnInsert": { 113 | "id": conversation_id, 114 | "created_at": now, 115 | "metadata": {} 116 | } 117 | }, 118 | upsert=True 119 | ) 120 | 121 | async def update_title(self, conversation_id: str, title: str): 122 | """Update conversation title.""" 123 | await self.db.conversations.update_one( 124 | {"id": conversation_id}, 125 | {"$set": {"title": title}} 126 | ) 127 | logger.info(f"Updated title for conversation {conversation_id}: {title}") 128 | 129 | # Initialize connection manager 130 | manager = ConnectionManager() 131 | 132 | class ChatRequest(BaseModel): 133 | """Chat request model.""" 134 | message: str 135 | 136 | class ChatResponse(BaseModel): 137 | """Chat response model.""" 138 | role: str 139 | content: str 140 | timestamp: datetime = None 141 | 142 | class FeedbackRequest(BaseModel): 143 | """Feedback request model.""" 144 | thumbs: str # "up" or "down" 145 | comment: str | None = None 146 | 147 | @router.put("/conversation", response_model=ConversationResponse) 148 | async def create_conversation(): 149 | """Create a new conversation with temporary title.""" 150 | conversation_id = str(uuid.uuid4()) 151 | now = datetime.utcnow() 152 | 153 | # Use a temporary title that will be updated with the first message 154 | title = "New Conversation" 155 | 156 | # System metadata 157 | metadata = { 158 | "created_by": "system", 159 | "created_at_timestamp": now.timestamp(), 160 | "source": "api", 161 | "title_generated": False # Flag to track if title has been generated 162 | } 163 | 164 | conversation_data = { 165 | "id": conversation_id, 166 | "title": title, 167 | "metadata": metadata, 168 | "messages": [], 169 | "created_at": now, 170 | "updated_at": now 171 | } 172 | 173 | await manager.db.conversations.insert_one(conversation_data) 174 | logger.info(f"Created conversation {conversation_id}") 175 | 176 | return ConversationResponse( 177 | id=conversation_id, 178 | title=title, 179 | metadata=metadata, 180 | created_at=now, 181 | updated_at=now, 182 | message_count=0 183 | ) 184 | 185 | @router.get("/conversations", response_model=List[ConversationResponse]) 186 | async def list_conversations(skip: int = 0, limit: int = 10): 187 | """List all conversations.""" 188 | conversations = [] 189 | cursor = manager.db.conversations.find().sort("updated_at", -1).skip(skip).limit(limit) 190 | 191 | async for conv in cursor: 192 | last_message = None 193 | messages = conv.get("messages", []) 194 | if messages: 195 | last_message = ChatMessage(**messages[-1]) 196 | conversations.append(ConversationResponse( 197 | id=conv["id"], 198 | title=conv.get("title", "New Conversation"), 199 | metadata=conv.get("metadata", {}), 200 | created_at=conv["created_at"], 201 | updated_at=conv.get("updated_at"), 202 | message_count=len(messages), 203 | last_message=last_message 204 | )) 205 | 206 | return conversations 207 | 208 | @router.get("/conversations/{conversation_id}", response_model=Conversation) 209 | async def get_conversation(conversation_id: str): 210 | """Get a specific conversation.""" 211 | conversation = await manager.get_conversation_history(conversation_id) 212 | if not conversation: 213 | raise HTTPException(status_code=404, detail="Conversation not found") 214 | return conversation 215 | 216 | @router.delete("/conversations/{conversation_id}") 217 | async def delete_conversation(conversation_id: str): 218 | """Delete a conversation.""" 219 | result = await manager.db.conversations.delete_one({"id": conversation_id}) 220 | if result.deleted_count == 0: 221 | raise HTTPException(status_code=404, detail="Conversation not found") 222 | 223 | # Disconnect any active WebSocket connections 224 | manager.disconnect(conversation_id) 225 | return {"status": "success", "message": "Conversation deleted"} 226 | 227 | async def generate_title(message: str) -> str: 228 | """Generate a concise title from the first message using LLM.""" 229 | try: 230 | system_prompt = """You are a helpful assistant that generates concise conversation titles. 231 | Create a brief, descriptive title (maximum 6 words) based on the user's first message. 232 | The title should capture the main topic or intent. Respond with ONLY the title, no other text.""" 233 | 234 | response = await llm.chat_completion( 235 | messages=[ 236 | {"role": "system", "content": system_prompt}, 237 | {"role": "user", "content": f"Generate a title for this conversation that starts with: {message}"} 238 | ], 239 | temperature=settings.TEMPERATURE, 240 | max_tokens=25 241 | ) 242 | 243 | # Clean up the response 244 | title = response.strip("'").strip('"').strip() 245 | return title 246 | 247 | except Exception as e: 248 | traceback.print_exc() 249 | logger.error(f"Error generating title: {str(e)}") 250 | return "New Conversation" # Fallback title 251 | 252 | @router.websocket("/ws/{conversation_id}") 253 | async def chat_websocket(websocket: WebSocket, conversation_id: str): 254 | """WebSocket endpoint for chat.""" 255 | try: 256 | await manager.connect(websocket, conversation_id) 257 | 258 | # Load conversation history 259 | logger.info("Retrieve Conv. History") 260 | conversation = await manager.get_conversation_history(conversation_id) 261 | 262 | # NOTE: Buat apa? 263 | # if conversation: 264 | # for message in conversation.messages: 265 | # await manager.save_message(conversation_id, message) 266 | 267 | while True: 268 | 269 | # Receive message from client 270 | data = await websocket.receive_json() 271 | user_message = ChatMessage( 272 | role="user", 273 | content=data["message"] 274 | ) 275 | 276 | # Save user message 277 | await manager.save_message(conversation_id, user_message) 278 | 279 | try: 280 | # Generate title if this is the first message 281 | logger.info("Generate Title...") 282 | if not conversation or (not conversation.messages and not conversation.metadata.get("title_generated")): 283 | title = await generate_title(user_message.content) 284 | await manager.update_title(conversation_id, title) 285 | await manager.db.conversations.update_one( 286 | {"id": conversation_id}, 287 | {"$set": {"metadata.title_generated": True}} 288 | ) 289 | 290 | # Get relevant context from vector store 291 | logger.info("Retrieve Relevant Context...") 292 | context = await vector_store.similarity_search( 293 | query=user_message.content, 294 | k=settings.TOP_K 295 | ) 296 | 297 | # Determine if this is a basic conversation or needs context 298 | logger.info("Determine Route Conversation (Basic / RAG)") 299 | is_basic_conversation = len(context) == 0 or all(c['score'] < settings.RAG_THRESHOLD for c in context) 300 | 301 | # Prepare conversation context 302 | conversation_context = ["Conversation History:"] 303 | if conversation: 304 | logger.info(conversation.messages) 305 | for message in conversation.messages[settings.N_LAST_MESSAGE:]: 306 | conversation_context.append(f"{message.role}: {message.content}") 307 | conversation_context = "\n".join(conversation_context) 308 | 309 | # Select appropriate system prompt based on query type 310 | if is_basic_conversation: 311 | system_prompt = """ 312 | Answer accoding user language, also consider conversation history if necessary to answer question. 313 | You are a helpful and friendly AI assistant. 314 | Engage in natural conversation and provide accurate, concise responses. 315 | If the user mentions something vague or unclear, politely ask for clarification or context 316 | to ensure you provide the most relevant and helpful answer. 317 | If the user refers to specific documents or information, 318 | let them know you can search through the knowledge base to assist them. 319 | """ 320 | 321 | messages = [ 322 | {"role": "system", "content": system_prompt}, 323 | {"role": "user", "content": conversation_context}, 324 | {"role": "user", "content": user_message.content} 325 | ] 326 | else: 327 | system_prompt = """ 328 | Answer according to user language, also consider conversation history if necessary to answer question. 329 | You are a helpful AI assistant with access to a knowledge base of documents. 330 | Use the provided context to answer questions accurately and comprehensively. 331 | 332 | For each response: 333 | 1. Analyze the provided context and cite specific sources using page numbers 334 | 2. Structure your response to clearly separate information from different sources 335 | 3. When citing information, use the format: [Source: filename, Page: X] 336 | 4. If multiple sources support a point, cite all relevant sources 337 | 5. If the context doesn't fully address the question, clearly state what information is from the sources and what is general knowledge 338 | 339 | Always maintain accuracy over completeness. If you're unsure about something, acknowledge your uncertainty and explain what evidence you do have from the sources. 340 | 341 | Remember to: 342 | - Provide page numbers for all cited information 343 | - Distinguish between direct quotes and paraphrased content 344 | - Note any conflicting information between sources 345 | - Be transparent about gaps in the provided context 346 | """ 347 | context_knowledge = [f"{cont['text']}\nSource: {cont['file_path']} - Page Number: {cont['page_number']}" for cont in context] 348 | context_knowledge = "\n".join(context_knowledge) 349 | messages = [ 350 | {"role": "system", "content": system_prompt}, 351 | {"role": "user", "content": conversation_context}, 352 | { 353 | "role": "user", 354 | "content": f""" 355 | Context: {context_knowledge} 356 | Question: {user_message.content} 357 | """} 358 | ] 359 | 360 | # Generate response using LLM 361 | logger.info("LLM Generate Response...") 362 | logger.info(f"Message Throw: {messages}") 363 | response = await llm.chat_completion( 364 | messages=messages, 365 | temperature=settings.TEMPERATURE 366 | ) 367 | 368 | # Create assistant message 369 | assistant_message = ChatMessage( 370 | role="assistant", 371 | content=response 372 | ) 373 | logger.info(f"Generated response for {'basic' if is_basic_conversation else 'context-based'} query") 374 | logger.info(assistant_message.to_dict()) 375 | 376 | # Save assistant message 377 | await manager.save_message(conversation_id, assistant_message) 378 | 379 | # Send response to client 380 | await manager.send_message(conversation_id, assistant_message) 381 | 382 | except Exception as e: 383 | traceback.print_exc() 384 | logger.error(f"Error processing message: {str(e)}") 385 | error_message = ChatMessage( 386 | role="system", 387 | content="I apologize, but I encountered an error processing your message." 388 | ) 389 | await manager.send_message(conversation_id, error_message) 390 | 391 | except WebSocketDisconnect: 392 | traceback.print_exc() 393 | manager.disconnect(conversation_id) 394 | 395 | except Exception as e: 396 | traceback.print_exc() 397 | logger.error(f"WebSocket error: {str(e)}") 398 | manager.disconnect(conversation_id) 399 | 400 | @router.post("/{conversation_id}", response_model=ChatResponse) 401 | async def chat_post(conversation_id: str, request: ChatRequest): 402 | """POST endpoint for chat - mirrors WebSocket functionality.""" 403 | try: 404 | # Verify conversation exists 405 | conversation = await manager.get_conversation_history(conversation_id) 406 | if not conversation: 407 | raise HTTPException(status_code=404, detail="Conversation not found") 408 | 409 | # Create user message 410 | user_message = ChatMessage( 411 | role="user", 412 | content=request.message 413 | ) 414 | 415 | # Save user message 416 | await manager.save_message(conversation_id, user_message) 417 | 418 | try: 419 | # Generate title if this is the first message 420 | logger.info("Generate Title...") 421 | if not conversation or (not conversation.messages and not conversation.metadata.get("title_generated")): 422 | title = await generate_title(user_message.content) 423 | await manager.update_title(conversation_id, title) 424 | await manager.db.conversations.update_one( 425 | {"id": conversation_id}, 426 | {"$set": {"metadata.title_generated": True}} 427 | ) 428 | 429 | # Get relevant context from vector store 430 | logger.info("Retrieve Relevant Context...") 431 | context = await vector_store.similarity_search( 432 | query=user_message.content, 433 | k=settings.TOP_K 434 | ) 435 | 436 | # Determine if this is a basic conversation or needs context 437 | logger.info("Determine Route Conversation (Basic / RAG)") 438 | is_basic_conversation = len(context) == 0 or all(c['score'] < settings.RAG_THRESHOLD for c in context) 439 | 440 | # Prepare conversation context 441 | conversation_context = ["Conversation History:"] 442 | if conversation: 443 | for message in conversation.messages[settings.N_LAST_MESSAGE:]: 444 | conversation_context.append(f"{message.role}: {message.content}") 445 | conversation_context = "\n".join(conversation_context) 446 | 447 | # Select appropriate system prompt based on query type 448 | if is_basic_conversation: 449 | system_prompt = """ 450 | Answer accoding user language, also consider conversation history if necessary to answer question. 451 | You are a helpful and friendly AI assistant. 452 | Engage in natural conversation and provide accurate, concise responses. 453 | If the user mentions something vague or unclear, politely ask for clarification or context 454 | to ensure you provide the most relevant and helpful answer. 455 | If the user refers to specific documents or information, 456 | let them know you can search through the knowledge base to assist them. 457 | """ 458 | 459 | messages = [ 460 | {"role": "system", "content": system_prompt}, 461 | {"role": "user", "content": conversation_context}, 462 | {"role": "user", "content": user_message.content} 463 | ] 464 | else: 465 | system_prompt = """ 466 | Answer accoding user language, also consider conversation history if necessary to answer question. 467 | You are a helpful AI assistant with access to a knowledge base of documents. 468 | Use the provided context to answer questions accurately and comprehensively. 469 | If the context doesn't fully address the question, acknowledge what you know from the context 470 | and what you're unsure about. Always maintain accuracy over completeness. 471 | """ 472 | 473 | messages = [ 474 | {"role": "system", "content": system_prompt}, 475 | {"role": "user", "content": conversation_context}, 476 | {"role": "user", "content": f"""Context: {json.dumps([c['text'] for c in context])} 477 | Question: {user_message.content}"""} 478 | ] 479 | 480 | # Generate response using LLM 481 | logger.info("LLM Generate Response...") 482 | logger.info(f"Message Throw: {messages}") 483 | response = await llm.chat_completion( 484 | messages=messages, 485 | temperature=settings.TEMPERATURE 486 | ) 487 | 488 | # Create assistant message 489 | assistant_message = ChatMessage( 490 | role="assistant", 491 | content=response 492 | ) 493 | logger.info(f"Generated response for {'basic' if is_basic_conversation else 'context-based'} query") 494 | logger.info(assistant_message.to_dict()) 495 | 496 | # Save assistant message 497 | await manager.save_message(conversation_id, assistant_message) 498 | 499 | # Return the response 500 | return ChatResponse( 501 | role=assistant_message.role, 502 | content=assistant_message.content, 503 | timestamp=assistant_message.timestamp 504 | ) 505 | 506 | except Exception as e: 507 | traceback.print_exc() 508 | logger.error(f"Error processing message: {str(e)}") 509 | raise HTTPException( 510 | status_code=500, 511 | detail="An error occurred while processing your message" 512 | ) 513 | 514 | except HTTPException: 515 | raise 516 | except Exception as e: 517 | traceback.print_exc() 518 | logger.error(f"Chat error: {str(e)}") 519 | raise HTTPException( 520 | status_code=500, 521 | detail="An unexpected error occurred" 522 | ) 523 | 524 | @router.post("/{conversation_id}/messages/{message_index}/feedback") 525 | async def submit_feedback(conversation_id: str, message_index: int, feedback: FeedbackRequest): 526 | """Submit feedback for a specific message in a conversation.""" 527 | try: 528 | # Get conversation 529 | conversation = await manager.get_conversation_history(conversation_id) 530 | if not conversation: 531 | raise HTTPException(status_code=404, detail="Conversation not found") 532 | 533 | # Validate message index 534 | if message_index < 0 or message_index >= len(conversation.messages): 535 | raise HTTPException(status_code=404, detail="Message not found") 536 | 537 | # Update feedback in the message 538 | now = datetime.utcnow() 539 | await manager.db.conversations.update_one( 540 | {"id": conversation_id}, 541 | {"$set": { 542 | f"messages.{message_index}.feedback": { 543 | "thumbs": feedback.thumbs, 544 | "comment": feedback.comment, 545 | "submitted_at": now 546 | } 547 | }} 548 | ) 549 | 550 | return {"status": "success", "message": "Feedback submitted successfully"} 551 | 552 | except HTTPException: 553 | raise 554 | except Exception as e: 555 | logger.error(f"Error submitting feedback: {str(e)}") 556 | raise HTTPException(status_code=500, detail="Failed to submit feedback") -------------------------------------------------------------------------------- /src/router/upload.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import uuid 3 | import traceback 4 | from typing import List 5 | from datetime import datetime 6 | 7 | from fastapi import APIRouter, UploadFile, File, BackgroundTasks, HTTPException 8 | from pydantic import BaseModel 9 | import aiofiles 10 | from loguru import logger 11 | 12 | from ..settings import get_settings 13 | from ..utils.background_tasks import BackgroundTaskManager 14 | 15 | settings = get_settings() 16 | router = APIRouter() 17 | task_manager = BackgroundTaskManager() 18 | 19 | # Ensure upload directory exists 20 | upload_dir = Path(settings.PDF_UPLOAD_DIR) 21 | upload_dir.mkdir(exist_ok=True) 22 | 23 | class TaskResponse(BaseModel): 24 | """Response model for upload tasks.""" 25 | task_id: str 26 | file_name: str 27 | status: str 28 | created_at: datetime 29 | 30 | @router.post("/upload", response_model=TaskResponse) 31 | async def upload_single_file( 32 | file: UploadFile = File(...), 33 | background_tasks: BackgroundTasks = None 34 | ): 35 | """ 36 | Upload a single PDF file for processing. 37 | 38 | Args: 39 | file: PDF file to upload 40 | background_tasks: FastAPI background tasks 41 | 42 | Returns: 43 | TaskResponse: Task information 44 | """ 45 | try: 46 | # Validate file type 47 | if not file.filename.lower().endswith('.pdf'): 48 | raise HTTPException( 49 | status_code=400, 50 | detail="Only PDF files are allowed" 51 | ) 52 | 53 | # Generate unique filename 54 | file_id = str(uuid.uuid4()) 55 | safe_filename = f"{file_id}_{file.filename}" 56 | file_path = upload_dir / safe_filename 57 | 58 | # Save file 59 | async with aiofiles.open(file_path, 'wb') as f: 60 | content = await file.read() 61 | await f.write(content) 62 | 63 | # Create task 64 | task_id = str(uuid.uuid4()) 65 | task_data = { 66 | "task_id": task_id, 67 | "file_name": file.filename, 68 | "file_path": str(file_path), 69 | "status": "pending", 70 | "created_at": datetime.utcnow() 71 | } 72 | 73 | # Store task in MongoDB 74 | await task_manager.mongo_client[settings.MONGODB_DB_NAME].tasks.insert_one(task_data) 75 | 76 | # Start processing in background 77 | background_tasks.add_task( 78 | task_manager.process_pdf_task, 79 | str(file_path), 80 | task_id 81 | ) 82 | 83 | logger.info(f"Started processing task {task_id} for file {file.filename}") 84 | return TaskResponse(**task_data) 85 | 86 | except Exception as e: 87 | traceback.print_exc() 88 | logger.error(f"Error processing upload: {str(e)}") 89 | raise HTTPException( 90 | status_code=500, 91 | detail="Error processing upload" 92 | ) 93 | 94 | @router.post("/upload/batch", response_model=List[TaskResponse]) 95 | async def upload_multiple_files( 96 | files: List[UploadFile] = File(...), 97 | background_tasks: BackgroundTasks = None 98 | ): 99 | """ 100 | Upload multiple PDF files for processing. 101 | 102 | Args: 103 | files: List of PDF files to upload 104 | background_tasks: FastAPI background tasks 105 | 106 | Returns: 107 | List[TaskResponse]: List of task information 108 | """ 109 | responses = [] 110 | for file in files: 111 | try: 112 | response = await upload_single_file(file, background_tasks) 113 | responses.append(response) 114 | except HTTPException as e: 115 | logger.warning(f"Skipping file {file.filename}: {str(e)}") 116 | continue 117 | 118 | if not responses: 119 | traceback.print_exc() 120 | raise HTTPException( 121 | status_code=400, 122 | detail="No valid files were uploaded" 123 | ) 124 | 125 | return responses 126 | 127 | @router.get("/task/{task_id}", response_model=TaskResponse) 128 | async def get_task_status(task_id: str): 129 | """ 130 | Get the status of a processing task. 131 | 132 | Args: 133 | task_id: Task identifier 134 | 135 | Returns: 136 | TaskResponse: Task information 137 | """ 138 | task = await task_manager.mongo_client[settings.MONGODB_DB_NAME].tasks.find_one( 139 | {"task_id": task_id} 140 | ) 141 | 142 | if not task: 143 | raise HTTPException( 144 | status_code=404, 145 | detail="Task not found" 146 | ) 147 | 148 | return TaskResponse(**task) -------------------------------------------------------------------------------- /src/settings.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from pydantic_settings import BaseSettings 3 | from functools import lru_cache 4 | 5 | 6 | class Settings(BaseSettings): 7 | """ 8 | Application settings. 9 | 10 | Attributes: 11 | # Server Settings 12 | APP_NAME: Name of the application 13 | DEBUG: Debug mode flag 14 | API_V1_PREFIX: API version 1 prefix 15 | 16 | # Processing Settings 17 | MAX_WORKERS: Maximum number of worker threads for background tasks 18 | CHUNK_SIZE: Size of text chunks for document processing (in tokens) 19 | CHUNK_OVERLAP: Overlap between chunks (in tokens) 20 | 21 | # Database Settings 22 | MONGODB_URL: MongoDB connection URL 23 | MONGODB_DB_NAME: MongoDB database name 24 | 25 | # Vector Store Settings 26 | QDRANT_URL: Qdrant server URL 27 | QDRANT_API_KEY: Qdrant API key 28 | COLLECTION_NAME: Name of the vector collection 29 | 30 | # LLM Settings 31 | GROQ_API_KEY: Groq API key 32 | MODEL_NAME: Name of the Groq model to use 33 | MAX_CONTEXT_LENGTH: Maximum context length for the model 34 | TEMPERATURE: Temperature for LLM responses 35 | 36 | # PDF Processing 37 | OCR_ENABLED: Whether to enable OCR for images in PDFs 38 | PDF_UPLOAD_DIR: Directory to store uploaded PDFs 39 | """ 40 | 41 | # Server Settings 42 | APP_NAME: str = "Advanced RAG System" 43 | DEBUG: bool = False 44 | API_V1_PREFIX: str = "/api/v1" 45 | 46 | # Processing Settings 47 | MAX_WORKERS: int = 4 48 | CHUNK_SIZE: int = 512 49 | CHUNK_OVERLAP: int = 50 50 | EMBEDDING_LENGTH: int = 768 51 | 52 | # Database Settings 53 | MONGODB_URL: str 54 | MONGODB_DB_NAME: str = "rag_system" 55 | 56 | # Vector Store Settings 57 | QDRANT_URL: str 58 | QDRANT_API_KEY: Optional[str] = None 59 | COLLECTION_NAME: str = "documents" 60 | 61 | # LLM Settings 62 | TOP_K: int = 5 # 25 63 | TOP_K_RERANKER: int = 10 64 | N_LAST_MESSAGE: int = -5 65 | RAG_THRESHOLD: float = 0.6 66 | GROQ_API_KEY: str 67 | MODEL_NAME: str = "mixtral-8x7b-32768" # Groq's Mixtral model 68 | MAX_CONTEXT_LENGTH: int = 8192 # 8k context window 69 | TEMPERATURE: float = 0.7 70 | 71 | # PDF Processing 72 | OCR_ENABLED: bool = True 73 | PDF_UPLOAD_DIR: str = "uploads" 74 | 75 | class Config: 76 | env_file = ".env" 77 | case_sensitive = True 78 | 79 | 80 | @lru_cache() 81 | def get_settings() -> Settings: 82 | """ 83 | Get cached settings instance. 84 | 85 | Returns: 86 | Settings: Application settings instance 87 | """ 88 | return Settings() 89 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITSEC-Research/cygen/12682278a30ca52cd5e159907a49b42540ed6486/src/utils/__init__.py -------------------------------------------------------------------------------- /src/utils/background_tasks.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Dict, Any 3 | from datetime import datetime 4 | import asyncio 5 | from collections import deque 6 | 7 | from tqdm import tqdm 8 | from loguru import logger 9 | from motor.motor_asyncio import AsyncIOMotorClient 10 | from qdrant_client import QdrantClient 11 | from qdrant_client.models import Distance, VectorParams 12 | 13 | from ..settings import get_settings 14 | from .pdf_processor import PDFProcessor 15 | from .vector_store import VectorStore 16 | 17 | settings = get_settings() 18 | 19 | class BackgroundTaskManager: 20 | """ 21 | Manages background tasks for PDF processing and vector storage. 22 | 23 | This class handles: 24 | 1. PDF processing using PDFProcessor 25 | 2. Vector storage in Qdrant with FastEmbed 26 | 3. Task status tracking in MongoDB 27 | 4. Task queuing and concurrency control 28 | """ 29 | 30 | def __init__(self): 31 | """Initialize the background task manager.""" 32 | self.pdf_processor = PDFProcessor() 33 | self.mongo_client = AsyncIOMotorClient(settings.MONGODB_URL) 34 | self.qdrant_client = QdrantClient( 35 | url=settings.QDRANT_URL, 36 | api_key=settings.QDRANT_API_KEY 37 | ) 38 | self.vector_store = VectorStore() 39 | self.task_queue = deque() 40 | self.processing_semaphore = asyncio.Semaphore(settings.MAX_WORKERS) 41 | self.queue_processor_task = None 42 | 43 | # Ensure vector collection exists 44 | self._init_vector_collection() 45 | logger.info("Initialized BackgroundTaskManager") 46 | 47 | def _init_vector_collection(self): 48 | """Initialize the vector collection in Qdrant if it doesn't exist.""" 49 | try: 50 | collections = self.qdrant_client.get_collections().collections 51 | collection_exists = any(c.name == settings.COLLECTION_NAME for c in collections) 52 | 53 | if collection_exists: 54 | collection_info = self.qdrant_client.get_collection(settings.COLLECTION_NAME) 55 | if collection_info.config.params.model_dump()["vectors"]["size"] != settings.EMBEDDING_LENGTH: 56 | self.qdrant_client.delete_collection(collection_name=settings.COLLECTION_NAME) 57 | self.qdrant_client.create_collection( 58 | collection_name=settings.COLLECTION_NAME, 59 | vectors_config=VectorParams( 60 | size=settings.EMBEDDING_LENGTH, 61 | distance=Distance.COSINE 62 | ) 63 | ) 64 | else: 65 | self.qdrant_client.create_collection( 66 | collection_name=settings.COLLECTION_NAME, 67 | vectors_config=VectorParams( 68 | size=settings.EMBEDDING_LENGTH, 69 | distance=Distance.COSINE 70 | ) 71 | ) 72 | logger.info(f"Created vector collection: {settings.COLLECTION_NAME}") 73 | except Exception as e: 74 | logger.error(f"Error initializing vector collection: {str(e)}") 75 | raise 76 | 77 | async def process_pdf_task(self, file_path: str, task_id: str): 78 | """ 79 | Process a PDF file in the background. 80 | 81 | Args: 82 | file_path: Path to the PDF file 83 | task_id: Unique identifier for the task 84 | """ 85 | # Add task to queue 86 | self.task_queue.append((file_path, task_id)) 87 | 88 | # Start queue processor if not running 89 | if self.queue_processor_task is None or self.queue_processor_task.done(): 90 | self.queue_processor_task = asyncio.create_task(self._process_queue()) 91 | 92 | logger.info(f"Added task {task_id} to queue for file {file_path}") 93 | 94 | async def _process_queue(self): 95 | """Process tasks from the queue with concurrency control.""" 96 | while self.task_queue: 97 | async with self.processing_semaphore: 98 | try: 99 | file_path, task_id = self.task_queue.popleft() 100 | await self._process_single_task(file_path, task_id) 101 | except Exception as e: 102 | logger.error(f"Error processing task from queue: {str(e)}") 103 | 104 | async def _process_single_task(self, file_path: str, task_id: str): 105 | """Process a single PDF task.""" 106 | try: 107 | # Update task status 108 | await self._update_task_status(task_id, "processing") 109 | 110 | # Process PDF 111 | documents = await self.pdf_processor.process_pdf(file_path) 112 | 113 | # Export HTML versions 114 | output_dir = Path(settings.PDF_UPLOAD_DIR) / "html" 115 | output_dir.mkdir(exist_ok=True) 116 | 117 | html_paths = [] 118 | chunk_ids = [] 119 | 120 | for document in tqdm(documents, desc="Processing Document"): 121 | # Extract text and metadata 122 | content = document["text"] 123 | metadata = {k: v for k, v in document.items() if k != "text"} 124 | 125 | # Store in vector store 126 | ids = await self.vector_store.add_texts( 127 | texts=[content], 128 | metadatas=[metadata] 129 | ) 130 | chunk_ids.extend(ids) 131 | 132 | # Store results 133 | result = { 134 | "status": "completed", 135 | "file_path": file_path, 136 | "html_paths": html_paths, 137 | "chunk_ids": chunk_ids, 138 | "num_pages": len(documents), 139 | "num_chunks": len(chunk_ids), 140 | "completed_at": datetime.utcnow() 141 | } 142 | 143 | await self._update_task_status(task_id, "completed", result) 144 | logger.info(f"Completed task {task_id} with {len(chunk_ids)} chunks") 145 | 146 | except Exception as e: 147 | error_msg = f"Error processing PDF: {str(e)}" 148 | logger.error(error_msg) 149 | await self._update_task_status(task_id, "failed", {"error": error_msg}) 150 | 151 | async def _update_task_status( 152 | self, 153 | task_id: str, 154 | status: str, 155 | result: Dict[str, Any] = None 156 | ): 157 | """ 158 | Update task status in MongoDB. 159 | 160 | Args: 161 | task_id: Task identifier 162 | status: Current status 163 | result: Optional result data 164 | """ 165 | update_data = { 166 | "status": status, 167 | "updated_at": datetime.utcnow() 168 | } 169 | if result: 170 | update_data.update(result) 171 | 172 | await self.mongo_client[settings.MONGODB_DB_NAME].tasks.update_one( 173 | {"task_id": task_id}, 174 | {"$set": update_data} 175 | ) -------------------------------------------------------------------------------- /src/utils/llm.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | from loguru import logger 3 | import groq 4 | 5 | from ..settings import get_settings 6 | 7 | settings = get_settings() 8 | 9 | class GroqLLM: 10 | """ 11 | Utility class for interacting with Groq's LLM API. 12 | Handles chat completions with proper context management. 13 | """ 14 | 15 | def __init__(self): 16 | """Initialize the Groq client.""" 17 | self.client = groq.AsyncGroq(api_key=settings.GROQ_API_KEY) 18 | self.model = settings.MODEL_NAME 19 | logger.info(f"Initialized GroqLLM with model: {self.model}") 20 | 21 | async def chat_completion( 22 | self, 23 | messages: List[Dict[str, str]], 24 | temperature: float = None, 25 | max_tokens: int = None 26 | ) -> str: 27 | """ 28 | Generate a chat completion response. 29 | 30 | Args: 31 | messages: List of message dictionaries with 'role' and 'content' 32 | temperature: Optional temperature override 33 | max_tokens: Optional max tokens override 34 | 35 | Returns: 36 | str: Generated response text 37 | """ 38 | try: 39 | # Calculate approximate token count 40 | total_chars = sum(len(m["content"]) for m in messages) 41 | approx_tokens = total_chars // 4 # Rough estimate 42 | 43 | # Ensure we don't exceed context window 44 | if max_tokens is None: 45 | max_tokens = min( 46 | settings.MAX_CONTEXT_LENGTH - approx_tokens, 47 | 2048 # Default max response length 48 | ) 49 | 50 | # Generate completion 51 | completion = await self.client.chat.completions.create( 52 | model=self.model, 53 | messages=messages, 54 | temperature=temperature or settings.TEMPERATURE, 55 | max_tokens=max_tokens, 56 | stream=False # We'll implement streaming later 57 | ) 58 | 59 | # Extract and return the response text 60 | response = completion.choices[0].message.content 61 | return response.strip() 62 | 63 | except Exception as e: 64 | logger.error(f"Error generating chat completion: {str(e)}") 65 | raise 66 | 67 | async def stream_chat_completion( 68 | self, 69 | messages: List[Dict[str, str]], 70 | temperature: float = None, 71 | max_tokens: int = None 72 | ): 73 | """ 74 | Generate a streaming chat completion response. 75 | 76 | Args: 77 | messages: List of message dictionaries with 'role' and 'content' 78 | temperature: Optional temperature override 79 | max_tokens: Optional max tokens override 80 | 81 | Yields: 82 | str: Generated response text chunks 83 | """ 84 | try: 85 | # Calculate approximate token count 86 | total_chars = sum(len(m["content"]) for m in messages) 87 | approx_tokens = total_chars // 4 # Rough estimate 88 | 89 | # Ensure we don't exceed context window 90 | if max_tokens is None: 91 | max_tokens = min( 92 | settings.MAX_CONTEXT_LENGTH - approx_tokens, 93 | 2048 # Default max response length 94 | ) 95 | 96 | # Generate streaming completion 97 | stream = await self.client.chat.completions.create( 98 | model=self.model, 99 | messages=messages, 100 | temperature=temperature or settings.TEMPERATURE, 101 | max_tokens=max_tokens, 102 | stream=True 103 | ) 104 | 105 | # Yield response chunks 106 | async for chunk in stream: 107 | if chunk.choices[0].delta.content: 108 | yield chunk.choices[0].delta.content 109 | 110 | except Exception as e: 111 | logger.error(f"Error generating streaming chat completion: {str(e)}") 112 | raise -------------------------------------------------------------------------------- /src/utils/pdf_processor.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Any 2 | import asyncio 3 | from concurrent.futures import ThreadPoolExecutor 4 | from PyPDF2 import PdfReader 5 | from loguru import logger 6 | 7 | from ..settings import get_settings 8 | from .text_chunking import chunk_text_recursive 9 | from .vector_store import VectorStore 10 | 11 | settings = get_settings() 12 | 13 | class PDFProcessor: 14 | """ 15 | PDF processor that extracts text directly from PDFs. 16 | 17 | This class handles: 18 | 1. PDF text extraction 19 | 2. Text chunking for vector storage 20 | 3. Background processing with configurable threads 21 | """ 22 | 23 | def __init__(self): 24 | """Initialize the PDF processor.""" 25 | self.executor = ThreadPoolExecutor(max_workers=settings.MAX_WORKERS) 26 | self.vector_store = VectorStore() 27 | self.semaphore = asyncio.Semaphore(settings.MAX_WORKERS) 28 | logger.info(f"Initialized PDFProcessor with {settings.MAX_WORKERS} workers") 29 | 30 | async def process_pdf(self, pdf_path: str) -> List[Dict[str, Any]]: 31 | """ 32 | Process a PDF file asynchronously. 33 | 34 | Args: 35 | pdf_path: Path to the PDF file 36 | 37 | Returns: 38 | List[Dict[str, Any]]: List of processed text chunks with metadata 39 | """ 40 | try: 41 | async with self.semaphore: # Limit concurrent processing 42 | # Extract text from PDF pages 43 | pages = await self._extract_text_from_pdf(pdf_path) 44 | 45 | # Process chunks in batches 46 | all_chunks = [] 47 | chunk_tasks = [] 48 | 49 | for page_num, page_text in enumerate(pages, 1): 50 | if not page_text.strip(): 51 | continue 52 | 53 | # Create chunk processing task 54 | task = asyncio.create_task(self._process_page( 55 | page_text=page_text, 56 | page_num=page_num, 57 | total_pages=len(pages), 58 | pdf_path=pdf_path 59 | )) 60 | chunk_tasks.append(task) 61 | 62 | # Wait for all chunk processing to complete 63 | chunk_results = await asyncio.gather(*chunk_tasks) 64 | for chunks in chunk_results: 65 | all_chunks.extend(chunks) 66 | 67 | logger.info(f"Successfully processed PDF: {pdf_path} into {len(all_chunks)} chunks") 68 | return all_chunks 69 | 70 | except Exception as e: 71 | logger.error(f"Error processing PDF {pdf_path}: {str(e)}") 72 | raise 73 | 74 | async def _process_page( 75 | self, 76 | page_text: str, 77 | page_num: int, 78 | total_pages: int, 79 | pdf_path: str 80 | ) -> List[Dict[str, Any]]: 81 | """Process a single page of text asynchronously.""" 82 | try: 83 | # Run chunking in thread pool to avoid blocking 84 | loop = asyncio.get_event_loop() 85 | chunks = await loop.run_in_executor( 86 | self.executor, 87 | chunk_text_recursive, 88 | page_text, 89 | settings.CHUNK_SIZE, 90 | settings.CHUNK_OVERLAP, 91 | { 92 | "file_path": pdf_path, 93 | "page_number": page_num, 94 | "total_pages": total_pages 95 | } 96 | ) 97 | return chunks 98 | 99 | except Exception as e: 100 | logger.error(f"Error processing page {page_num}: {str(e)}") 101 | raise 102 | 103 | async def _extract_text_from_pdf(self, pdf_path: str) -> List[str]: 104 | """ 105 | Extract text from each page of the PDF. 106 | 107 | Args: 108 | pdf_path: Path to the PDF file 109 | 110 | Returns: 111 | List[str]: List of text content from each page 112 | """ 113 | def _extract(): 114 | try: 115 | reader = PdfReader(pdf_path) 116 | pages = [] 117 | for page in reader.pages: 118 | text = page.extract_text() 119 | text = text.strip() 120 | text = '\n'.join(line.strip() for line in text.splitlines() if line.strip()) 121 | pages.append(text) 122 | 123 | logger.info(f"Extracted text from {len(pages)} pages in {pdf_path}") 124 | return pages 125 | 126 | except Exception as e: 127 | logger.error(f"Error extracting text from PDF {pdf_path}: {str(e)}") 128 | raise 129 | 130 | # Run extraction in thread pool 131 | return await asyncio.get_event_loop().run_in_executor( 132 | self.executor, _extract 133 | ) 134 | 135 | def get_text_statistics(self, chunks: List[Dict[str, Any]]) -> Dict[str, Any]: 136 | """ 137 | Get statistics about the processed text. 138 | 139 | Args: 140 | chunks: List of text chunks with metadata 141 | 142 | Returns: 143 | Dict[str, Any]: Statistics about the text 144 | """ 145 | total_chars = sum(len(chunk["text"]) for chunk in chunks) 146 | total_chunks = len(chunks) 147 | avg_chunk_size = total_chars / total_chunks if total_chunks > 0 else 0 148 | 149 | return { 150 | "total_chunks": total_chunks, 151 | "total_characters": total_chars, 152 | "average_chunk_size": avg_chunk_size, 153 | "chunks_per_page": total_chunks / chunks[0]["total_pages"] if chunks else 0 154 | } -------------------------------------------------------------------------------- /src/utils/text_chunking.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Any 2 | from langchain.text_splitter import RecursiveCharacterTextSplitter 3 | from loguru import logger 4 | 5 | def chunk_text_recursive( 6 | text: str, 7 | chunk_size: int = 512, 8 | chunk_overlap: int = 50, 9 | metadata: Dict[str, Any] = None 10 | ) -> List[Dict[str, Any]]: 11 | """ 12 | Split text into chunks using recursive character text splitter. 13 | This method is more context-aware than simple character splitting. 14 | 15 | Args: 16 | text: Text to split 17 | chunk_size: Maximum size of each chunk 18 | chunk_overlap: Number of characters to overlap between chunks 19 | metadata: Optional metadata to attach to each chunk 20 | 21 | Returns: 22 | List of dictionaries containing chunk text and metadata 23 | """ 24 | try: 25 | # Initialize the recursive splitter 26 | splitter = RecursiveCharacterTextSplitter( 27 | chunk_size=chunk_size, 28 | chunk_overlap=chunk_overlap, 29 | length_function=len, 30 | separators=["\n\n", "\n"] 31 | ) 32 | 33 | # Split the text 34 | chunks = splitter.split_text(text) 35 | 36 | # Prepare chunk documents with metadata 37 | chunk_docs = [] 38 | for i, chunk in enumerate(chunks): 39 | doc = { 40 | "text": chunk, 41 | "chunk_index": i, 42 | "total_chunks": len(chunks) 43 | } 44 | if metadata: 45 | doc.update(metadata) 46 | chunk_docs.append(doc) 47 | 48 | logger.info(f"Split text into {len(chunks)} chunks") 49 | return chunk_docs 50 | 51 | except Exception as e: 52 | logger.error(f"Error chunking text: {str(e)}") 53 | raise -------------------------------------------------------------------------------- /src/utils/vector_store.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Any 2 | import uuid 3 | 4 | from loguru import logger 5 | from fastembed import TextEmbedding 6 | from qdrant_client import QdrantClient 7 | from fastembed.rerank.cross_encoder import TextCrossEncoder 8 | from qdrant_client.http.models import PointStruct, Distance, VectorParams 9 | 10 | from ..settings import get_settings 11 | 12 | settings = get_settings() 13 | 14 | class VectorStore: 15 | """ 16 | Vector store utility using FastEmbed and Qdrant. 17 | Uses nomic-embed-text-v1.5 for high-quality embeddings. 18 | """ 19 | 20 | def __init__(self): 21 | """Initialize the vector store with FastEmbed and Qdrant.""" 22 | 23 | # Initialize Reranker 24 | self.reranker = TextCrossEncoder( 25 | model_name="Xenova/ms-marco-MiniLM-L-12-v2" 26 | ) 27 | 28 | # Initialize FastEmbed 29 | self.embedding_model = TextEmbedding( 30 | model_name="nomic-ai/nomic-embed-text-v1.5", 31 | max_length=settings.EMBEDDING_LENGTH 32 | ) 33 | 34 | # Initialize Qdrant client 35 | self.qdrant = QdrantClient( 36 | url=settings.QDRANT_URL, 37 | api_key=settings.QDRANT_API_KEY 38 | ) 39 | 40 | # Ensure collection exists 41 | self._init_collection() 42 | logger.info("Initialized VectorStore") 43 | 44 | def _init_collection(self): 45 | """Initialize the vector collection if it doesn't exist.""" 46 | try: 47 | collections = self.qdrant.get_collections().collections 48 | if not any(c.name == settings.COLLECTION_NAME for c in collections): 49 | self.qdrant.create_collection( 50 | collection_name=settings.COLLECTION_NAME, 51 | vectors_config=VectorParams( 52 | size=settings.EMBEDDING_LENGTH, 53 | distance=Distance.COSINE 54 | ) 55 | ) 56 | logger.info(f"Created vector collection: {settings.COLLECTION_NAME}") 57 | except Exception as e: 58 | logger.error(f"Error initializing vector collection: {str(e)}") 59 | raise 60 | 61 | async def add_texts( 62 | self, 63 | texts: List[str], 64 | metadatas: List[Dict[str, Any]] = None 65 | ) -> List[str]: 66 | """ 67 | Add texts to the vector store. 68 | 69 | Args: 70 | texts: List of texts to add 71 | metadatas: Optional list of metadata dicts 72 | 73 | Returns: 74 | List of IDs for the added texts 75 | """ 76 | if not self.qdrant.collection_exists(collection_name=settings.COLLECTION_NAME): 77 | self._init_collection() 78 | try: 79 | points = [] 80 | embeddings = list(self.embedding_model.embed(texts)) 81 | ids = [str(uuid.uuid4()) for _ in texts] 82 | for i, (text, embedding) in enumerate(zip(texts, embeddings)): 83 | point = PointStruct( 84 | id=ids[i], 85 | vector=embedding.tolist(), 86 | payload={ 87 | "text": text, 88 | **(metadatas[i] if metadatas else {}) 89 | } 90 | ) 91 | points.append(point) 92 | 93 | # Upload to Qdrant 94 | self.qdrant.upsert( 95 | collection_name=settings.COLLECTION_NAME, 96 | points=points 97 | ) 98 | 99 | logger.info(f"Added {len(texts)} texts to vector store") 100 | return ids 101 | 102 | except Exception as e: 103 | logger.error(f"Error adding texts to vector store: {str(e)}") 104 | raise 105 | 106 | async def similarity_search( 107 | self, 108 | query: str, 109 | k: int = 4, 110 | filter: Dict[str, Any] = None 111 | ) -> List[Dict[str, Any]]: 112 | """ 113 | Search for similar texts in the vector store. 114 | 115 | Args: 116 | query: Query text 117 | k: Number of results to return 118 | filter: Optional filter for the search 119 | 120 | Returns: 121 | List of similar documents with scores 122 | """ 123 | try: 124 | # Generate query embedding 125 | query_embedding = list(self.embedding_model.embed([query]))[0] 126 | 127 | # Search in Qdrant 128 | results = self.qdrant.search( 129 | collection_name=settings.COLLECTION_NAME, 130 | query_vector=query_embedding.tolist(), 131 | limit=k, 132 | query_filter=filter 133 | ) 134 | 135 | # Format results 136 | docs = [] 137 | for res in results: 138 | doc = { 139 | "id": res.id, 140 | "score": res.score, 141 | **res.payload 142 | } 143 | docs.append(doc) 144 | 145 | # Re Ranking Document 146 | 147 | return docs 148 | 149 | except Exception as e: 150 | logger.error(f"Error searching vector store: {str(e)}") 151 | raise -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # Exit on error 3 | 4 | # Check if .env file exists 5 | if [ ! -f ".env" ]; then 6 | echo "⚠️ .env file not found! Creating from example..." 7 | if [ -f ".env.example" ]; then 8 | cp .env.example .env 9 | echo "✅ Created .env from .env.example. Please update the values as needed." 10 | else 11 | echo "❌ .env.example not found! Please create a .env file manually." 12 | exit 1 13 | fi 14 | fi 15 | 16 | # Define colors for output 17 | GREEN='\033[0;32m' 18 | YELLOW='\033[1;33m' 19 | BLUE='\033[0;34m' 20 | NC='\033[0m' # No Color 21 | 22 | # Print header 23 | echo -e "${BLUE}╔════════════════════════════════════════════════╗${NC}" 24 | echo -e "${BLUE}║ ${GREEN}RAG System Launcher${BLUE} ║${NC}" 25 | echo -e "${BLUE}╚════════════════════════════════════════════════╝${NC}" 26 | 27 | # Show options 28 | echo -e "${YELLOW}Choose a launch option:${NC}" 29 | echo -e " ${GREEN}1)${NC} Start full system with Docker Compose" 30 | echo -e " ${GREEN}2)${NC} Start FastAPI backend only" 31 | echo -e " ${GREEN}3)${NC} Start Streamlit frontend only" 32 | echo -e " ${GREEN}4)${NC} Quit" 33 | 34 | # Get user input 35 | read -p "Enter your choice [1-4]: " choice 36 | 37 | case $choice in 38 | 1) 39 | echo -e "${YELLOW}Starting both backend and frontend with Docker Compose...${NC}" 40 | docker-compose up --build 41 | ;; 42 | 2) 43 | echo -e "${YELLOW}Starting FastAPI backend service...${NC}" 44 | docker-compose up --build app 45 | ;; 46 | 3) 47 | echo -e "${YELLOW}Starting Streamlit frontend service...${NC}" 48 | 49 | # Check if Docker is preferred 50 | read -p "Use Docker for Streamlit? (y/n): " use_docker 51 | 52 | if [[ $use_docker == "y" || $use_docker == "Y" ]]; then 53 | echo -e "${YELLOW}Starting Streamlit in Docker...${NC}" 54 | docker-compose up --build streamlit 55 | else 56 | echo -e "${YELLOW}Starting Streamlit directly...${NC}" 57 | 58 | # Check for Python virtual environment 59 | if [ -d "venv" ]; then 60 | source venv/bin/activate 61 | elif [ -d ".venv" ]; then 62 | source .venv/bin/activate 63 | else 64 | echo -e "${YELLOW}No Python virtual environment found. Using system Python.${NC}" 65 | fi 66 | 67 | # Check for installed dependencies 68 | if ! pip show streamlit &> /dev/null; then 69 | echo -e "${YELLOW}Streamlit not found. Installing dependencies...${NC}" 70 | pip install -e . 71 | fi 72 | 73 | # Start Streamlit 74 | cd streamlit && ./run.sh 75 | fi 76 | ;; 77 | 4) 78 | echo -e "${YELLOW}Exiting...${NC}" 79 | exit 0 80 | ;; 81 | *) 82 | echo -e "${YELLOW}Invalid choice. Exiting.${NC}" 83 | exit 1 84 | ;; 85 | esac -------------------------------------------------------------------------------- /streamlit/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim 2 | 3 | WORKDIR /app 4 | 5 | # Set environment variables 6 | ENV PYTHONUNBUFFERED=1 \ 7 | PYTHONDONTWRITEBYTECODE=1 \ 8 | UV_SYSTEM_PYTHON=1 \ 9 | PATH="/root/.local/bin:$PATH" \ 10 | STREAMLIT_SERVER_PORT=8501 \ 11 | STREAMLIT_SERVER_HEADLESS=true \ 12 | STREAMLIT_BROWSER_GATHER_USAGE_STATS=false 13 | 14 | # Install system dependencies 15 | RUN apt-get update && apt-get install -y --no-install-recommends \ 16 | build-essential \ 17 | curl \ 18 | && rm -rf /var/lib/apt/lists/* 19 | 20 | # Install uv 21 | RUN curl -LsSf https://astral.sh/uv/install.sh | sh 22 | 23 | # Copy project files for dependency installation 24 | COPY pyproject.toml . 25 | COPY .env.example . 26 | 27 | # Create required directories 28 | RUN mkdir -p uploads logs && chmod 777 uploads logs 29 | 30 | # Create venv and install dependencies 31 | RUN uv venv && \ 32 | . .venv/bin/activate && \ 33 | uv sync 34 | 35 | # Copy Streamlit application files 36 | COPY streamlit/ ./streamlit/ 37 | 38 | # Create a copy of the app.py directly in the root for easy access 39 | RUN cp ./streamlit/app.py ./streamlit/docker-app.py /app/ || true 40 | 41 | # Set working directory to the streamlit directory 42 | WORKDIR /app/streamlit 43 | 44 | # Make sure the run script is directly available and executable 45 | COPY streamlit/run.sh /app/streamlit/run.sh 46 | RUN chmod +x /app/streamlit/run.sh 47 | 48 | # Also copy it to the root as a fallback 49 | COPY streamlit/run.sh /run.sh 50 | RUN chmod +x /run.sh 51 | 52 | # Expose Streamlit port 53 | EXPOSE 8501 54 | 55 | # Health check 56 | HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ 57 | CMD curl --fail http://localhost:8501/_stcore/health || exit 1 58 | 59 | # Start Streamlit app 60 | ENTRYPOINT ["/run.sh"] -------------------------------------------------------------------------------- /streamlit/app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import httpx 3 | import os 4 | from typing import Dict, List, Any, Optional 5 | from datetime import datetime 6 | 7 | # Configure the app 8 | st.set_page_config( 9 | page_title="RAG Chat System", 10 | page_icon="📚", 11 | layout="wide", 12 | initial_sidebar_state="expanded" 13 | ) 14 | 15 | # Constants 16 | API_URL = os.getenv("API_URL", "http://localhost:8000") 17 | API_PREFIX = "/api/v1" 18 | 19 | # State management 20 | if "messages" not in st.session_state: 21 | st.session_state.messages = [] 22 | 23 | if "conversation_id" not in st.session_state: 24 | st.session_state.conversation_id = None 25 | 26 | 27 | def get_api_url(endpoint: str) -> str: 28 | """Get the full API URL for an endpoint.""" 29 | return f"{API_URL}{API_PREFIX}{endpoint}" 30 | 31 | 32 | def load_conversations() -> List[Dict[str, Any]]: 33 | """Load all conversations from the API.""" 34 | try: 35 | response = httpx.get(get_api_url("/chat/conversations")) 36 | if response.status_code == 200: 37 | return response.json() 38 | else: 39 | st.error(f"Failed to load conversations: {response.text}") 40 | return [] 41 | except Exception as _: 42 | return [] 43 | 44 | 45 | def create_conversation() -> Optional[str]: 46 | """Create a new conversation and return its ID.""" 47 | try: 48 | response = httpx.put(get_api_url("/chat/conversation")) 49 | if response.status_code == 200: 50 | return response.json()["id"] 51 | else: 52 | st.error(f"Failed to create conversation: {response.text}") 53 | return None 54 | except Exception as e: 55 | st.error(f"Error creating conversation: {str(e)}") 56 | return None 57 | 58 | 59 | def get_conversation(conversation_id: str) -> Dict[str, Any]: 60 | """Get a conversation by ID.""" 61 | try: 62 | response = httpx.get(get_api_url(f"/chat/conversations/{conversation_id}")) 63 | if response.status_code == 200: 64 | return response.json() 65 | else: 66 | st.error(f"Failed to get conversation: {response.text}") 67 | return {} 68 | except Exception as _: 69 | return {} 70 | 71 | 72 | def upload_document(file) -> bool: 73 | """Upload a document to the API.""" 74 | try: 75 | files = {"file": (file.name, file.getvalue(), "application/pdf")} 76 | response = httpx.post(get_api_url("/documents/upload"), files=files) 77 | if response.status_code == 200: 78 | return True 79 | else: 80 | st.error(f"Failed to upload document: {response.text}") 81 | return False 82 | except Exception as e: 83 | st.error(f"Error uploading document: {str(e)}") 84 | return False 85 | 86 | 87 | def get_task_status(task_id: str) -> Dict[str, Any]: 88 | """Get the status of a document processing task.""" 89 | try: 90 | response = httpx.get(get_api_url(f"/documents/task/{task_id}")) 91 | if response.status_code == 200: 92 | return response.json() 93 | else: 94 | return {"status": "failed"} 95 | except Exception: 96 | return {"status": "failed"} 97 | 98 | 99 | def submit_feedback(conversation_id: str, message_index: int, thumbs: str, comment: str | None = None) -> bool: 100 | """Submit feedback for a message.""" 101 | try: 102 | response = httpx.post( 103 | get_api_url(f"/chat/{conversation_id}/messages/{message_index}/feedback"), 104 | json={"thumbs": thumbs, "comment": comment}, 105 | timeout=10.0 106 | ) 107 | return response.status_code == 200 108 | except Exception as e: 109 | st.error(f"Error submitting feedback: {str(e)}") 110 | return False 111 | 112 | 113 | def format_message(msg: Dict[str, Any], message_index: int) -> None: 114 | """Format and display a message in the chat UI.""" 115 | role = msg.get("role", "") 116 | content = msg.get("content", "") 117 | feedback = msg.get("feedback", {}) 118 | 119 | if role == "user": 120 | st.chat_message("user").write(content) 121 | elif role == "assistant": 122 | with st.chat_message("assistant"): 123 | st.write(content) 124 | 125 | # Only show feedback for assistant messages 126 | if not feedback.get("submitted_at"): 127 | # Use st.feedback for thumbs up/down 128 | selected = st.feedback( 129 | "thumbs", 130 | key=f"feedback_{message_index}" 131 | ) 132 | 133 | # Handle feedback submission 134 | if selected is not None: 135 | thumbs = "down" if selected == 0 else "up" 136 | 137 | # For thumbs down, show comment input 138 | comment = None 139 | if selected == 0: 140 | comment = st.text_input( 141 | "What was wrong with this response?", 142 | key=f"feedback_comment_{message_index}" 143 | ) 144 | if st.button("Submit Feedback", key=f"submit_feedback_{message_index}"): 145 | if submit_feedback(st.session_state.conversation_id, message_index, thumbs, comment): 146 | st.warning("Thank you for your feedback!") 147 | else: 148 | # For thumbs up, submit immediately 149 | if submit_feedback(st.session_state.conversation_id, message_index, thumbs): 150 | st.success("Thank you for your feedback!") 151 | else: 152 | # Show submitted feedback 153 | feedback_icon = "👍" if feedback["thumbs"] == "up" else "👎" 154 | st.caption(f"Feedback: {feedback_icon}") 155 | if feedback.get("comment"): 156 | st.caption(f"Comment: {feedback['comment']}") 157 | 158 | elif role == "system": 159 | st.chat_message("system").write(content) 160 | 161 | 162 | def send_message(conversation_id: str, message: str) -> Optional[Dict[str, Any]]: 163 | """Send a message to the chat API and return the response.""" 164 | try: 165 | response = httpx.post( 166 | get_api_url(f"/chat/{conversation_id}"), 167 | json={"message": message}, 168 | timeout=60.0 # Increased timeout for long responses 169 | ) 170 | 171 | if response.status_code == 200: 172 | return response.json() 173 | else: 174 | st.error(f"Failed to send message: {response.text}") 175 | return None 176 | except Exception as e: 177 | st.error(f"Error sending message: {str(e)}") 178 | return None 179 | 180 | 181 | def load_conversation_history(conversation_id: str) -> None: 182 | """Load conversation history and update the UI.""" 183 | conversation = get_conversation(conversation_id) 184 | if conversation and "messages" in conversation: 185 | st.session_state.messages = conversation["messages"] 186 | 187 | 188 | # UI Components 189 | def sidebar(): 190 | """Render the sidebar with conversations and document upload.""" 191 | st.sidebar.title("RAG Chat System") 192 | 193 | # Document Upload Section 194 | st.sidebar.header("📤 Upload Documents") 195 | uploaded_file = st.sidebar.file_uploader("Upload PDF", type="pdf") 196 | 197 | if uploaded_file and st.sidebar.button("Process Document"): 198 | with st.sidebar.status("Uploading document...") as status: 199 | if upload_document(uploaded_file): 200 | status.update(label="Document uploaded successfully!", state="complete") 201 | st.sidebar.success(f"Document '{uploaded_file.name}' uploaded and being processed.") 202 | else: 203 | status.update(label="Failed to upload document", state="error") 204 | 205 | # Conversation Management 206 | st.sidebar.header("💬 Conversations") 207 | 208 | if st.sidebar.button("New Conversation"): 209 | with st.spinner("Creating new conversation..."): 210 | # Create a new conversation 211 | conversation_id = create_conversation() 212 | if conversation_id: 213 | st.session_state.conversation_id = conversation_id 214 | st.session_state.messages = [] 215 | st.sidebar.success("New conversation created!") 216 | st.rerun() 217 | else: 218 | st.sidebar.error("Failed to create new conversation.") 219 | 220 | # List existing conversations 221 | conversations = load_conversations() 222 | if conversations: 223 | st.sidebar.subheader("Select Conversation") 224 | for conv in conversations: 225 | conv_id = conv.get("id", "") 226 | title = conv.get("title", "Untitled") 227 | created_at = conv.get("created_at", "") 228 | 229 | # Format the date if it exists 230 | if created_at: 231 | try: 232 | # Parse ISO format or timestamp 233 | if isinstance(created_at, str): 234 | created_date = datetime.fromisoformat(created_at.replace("Z", "+00:00")) 235 | else: 236 | created_date = datetime.fromtimestamp(created_at) 237 | 238 | date_str = created_date.strftime("%Y-%m-%d %H:%M") 239 | except Exception as _: 240 | date_str = "Unknown date" 241 | else: 242 | date_str = "Unknown date" 243 | 244 | # Create a button for each conversation 245 | if st.sidebar.button(f"{title} ({date_str})", key=f"conv_{conv_id}"): 246 | st.session_state.conversation_id = conv_id 247 | load_conversation_history(conv_id) 248 | st.rerun() 249 | 250 | # About section 251 | st.sidebar.header("ℹ️ About") 252 | st.sidebar.info( 253 | """ 254 | This is a RAG (Retrieval-Augmented Generation) chat system. 255 | Upload documents and ask questions about them. 256 | 257 | The system will retrieve relevant information from your documents 258 | to provide accurate and contextual responses. 259 | """ 260 | ) 261 | 262 | 263 | def main_content(): 264 | """Render the main chat interface.""" 265 | st.title("RAG Chat System") 266 | 267 | # Check if we have an active conversation 268 | if st.session_state.conversation_id is None: 269 | st.info("👈 Create a new conversation or select an existing one from the sidebar.") 270 | return 271 | 272 | # Display conversation title 273 | conversation = get_conversation(st.session_state.conversation_id) 274 | if conversation: 275 | st.subheader(f"Conversation: {conversation.get('title', 'Untitled')}") 276 | 277 | # Display chat messages 278 | for message_index, message in enumerate(st.session_state.messages): 279 | format_message(message, message_index) 280 | 281 | # Chat input 282 | if prompt := st.chat_input("Ask a question about your documents..."): 283 | # Add user message to UI 284 | st.chat_message("user").write(prompt) 285 | 286 | # Add to session state 287 | user_message = { 288 | "role": "user", 289 | "content": prompt 290 | } 291 | st.session_state.messages.append(user_message) 292 | 293 | # Send message and get response 294 | with st.spinner("Thinking..."): 295 | response = send_message(st.session_state.conversation_id, prompt) 296 | 297 | if response: 298 | # Add assistant message to session state and display it 299 | assistant_message = { 300 | "role": response["role"], 301 | "content": response["content"] 302 | } 303 | st.session_state.messages.append(assistant_message) 304 | format_message(assistant_message, len(st.session_state.messages) - 1) 305 | else: 306 | st.error("Failed to get response. Please try again.") 307 | 308 | 309 | # Main app layout 310 | def main(): 311 | sidebar() 312 | main_content() 313 | 314 | 315 | if __name__ == "__main__": 316 | main() -------------------------------------------------------------------------------- /streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | primaryColor = "#4682B4" # Steel Blue 3 | backgroundColor = "#F0F2F6" 4 | secondaryBackgroundColor = "#E0E4E8" 5 | textColor = "#262730" 6 | font = "sans serif" 7 | 8 | [server] 9 | port = 8501 10 | maxUploadSize = 200 11 | enableCORS = true 12 | enableXsrfProtection = true 13 | headless = true 14 | 15 | [browser] 16 | gatherUsageStats = false 17 | 18 | [runner] 19 | magicEnabled = true 20 | installTracer = false 21 | fixMatplotlib = true 22 | 23 | [logger] 24 | level = "info" -------------------------------------------------------------------------------- /streamlit/run-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # Exit on error 3 | 4 | # Define colors for output 5 | GREEN='\033[0;32m' 6 | YELLOW='\033[1;33m' 7 | BLUE='\033[0;34m' 8 | NC='\033[0m' # No Color 9 | 10 | # Print header 11 | echo -e "${BLUE}╔════════════════════════════════════════════════╗${NC}" 12 | echo -e "${BLUE}║ ${GREEN}Streamlit Docker Runner${BLUE} ║${NC}" 13 | echo -e "${BLUE}╚════════════════════════════════════════════════╝${NC}" 14 | 15 | # Check if Docker is installed 16 | if ! command -v docker &> /dev/null; then 17 | echo -e "${YELLOW}Docker is not installed. Please install Docker to continue.${NC}" 18 | exit 1 19 | fi 20 | 21 | # Check if docker-compose.yml exists 22 | if [ ! -f "docker-compose.yml" ]; then 23 | echo -e "${YELLOW}docker-compose.yml not found. Make sure you're in the streamlit directory.${NC}" 24 | exit 1 25 | fi 26 | 27 | echo -e "${YELLOW}Building and starting Streamlit container...${NC}" 28 | echo -e "${YELLOW}This will connect to a FastAPI backend running on your host machine.${NC}" 29 | echo -e "${YELLOW}Make sure the FastAPI backend is running on port 8000.${NC}" 30 | 31 | # Start with docker-compose 32 | docker-compose build 33 | docker-compose up 34 | 35 | # Script never reaches here if docker-compose up is running in foreground -------------------------------------------------------------------------------- /streamlit/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # Exit on error 3 | 4 | # Activate virtual environment if it exists 5 | if [ -d "/app/.venv" ]; then 6 | echo "Activating virtual environment..." 7 | . /app/.venv/bin/activate 8 | fi 9 | 10 | # Load environment variables from parent .env file if exists 11 | if [ -f "../.env" ]; then 12 | echo "Loading environment variables from ../.env" 13 | export $(grep -v '^#' ../.env | xargs) 14 | fi 15 | 16 | # Set default values 17 | export API_URL=${API_URL:-"http://localhost:8000"} 18 | export WS_URL=${WS_URL:-"ws://localhost:8000"} 19 | export STREAMLIT_SERVER_PORT=${STREAMLIT_SERVER_PORT:-8501} 20 | export STREAMLIT_SERVER_HEADLESS=${STREAMLIT_SERVER_HEADLESS:-true} 21 | export STREAMLIT_BROWSER_GATHER_USAGE_STATS=${STREAMLIT_BROWSER_GATHER_USAGE_STATS:-false} 22 | 23 | # Print environment settings 24 | echo "Starting Streamlit app with:" 25 | echo " - API_URL: $API_URL" 26 | echo " - WS_URL: $WS_URL" 27 | echo " - STREAMLIT_SERVER_PORT: $STREAMLIT_SERVER_PORT" 28 | echo " - Python executable: $(which python)" 29 | 30 | # Check app files and run the appropriate one 31 | if [ -f "app.py" ]; then 32 | echo "Using app.py for Streamlit" 33 | exec streamlit run app.py 34 | else 35 | echo "ERROR: No Streamlit app found in $(pwd)!" 36 | echo "Directory contents:" 37 | ls -la 38 | exit 1 39 | fi -------------------------------------------------------------------------------- /streamlit/utils.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import os 3 | import time 4 | import streamlit as st 5 | from typing import Dict, List, Any, Optional, Tuple 6 | from datetime import datetime 7 | 8 | # Constants 9 | API_URL = os.getenv("API_URL", "http://localhost:8000") 10 | API_PREFIX = "/api/v1" 11 | 12 | def get_api_url(endpoint: str) -> str: 13 | """Get the full API URL for an endpoint.""" 14 | return f"{API_URL}{API_PREFIX}{endpoint}" 15 | 16 | # Document Management 17 | def upload_document(file) -> Tuple[bool, Optional[str]]: 18 | """ 19 | Upload a document to the API. 20 | 21 | Args: 22 | file: The uploaded file object from Streamlit 23 | 24 | Returns: 25 | Tuple of (success, task_id or None) 26 | """ 27 | try: 28 | files = {"file": (file.name, file.getvalue(), "application/pdf")} 29 | response = httpx.post(get_api_url("/documents/upload"), files=files) 30 | 31 | if response.status_code == 200: 32 | return True, response.json().get("task_id") 33 | else: 34 | st.error(f"Failed to upload document: {response.text}") 35 | return False, None 36 | except Exception as e: 37 | st.error(f"Error uploading document: {str(e)}") 38 | return False, None 39 | 40 | def poll_task_status(task_id: str, max_attempts: int = 60, interval: float = 2.0) -> Dict[str, Any]: 41 | """ 42 | Poll the task status until it completes or fails. 43 | 44 | Args: 45 | task_id: The ID of the task to poll 46 | max_attempts: Maximum number of polling attempts 47 | interval: Time interval between polls in seconds 48 | 49 | Returns: 50 | Task status information 51 | """ 52 | for attempt in range(max_attempts): 53 | try: 54 | response = httpx.get(get_api_url(f"/documents/task/{task_id}")) 55 | if response.status_code == 200: 56 | task_data = response.json() 57 | status = task_data.get("status", "") 58 | 59 | if status in ["completed", "failed"]: 60 | return task_data 61 | 62 | # Add a small delay before the next poll 63 | time.sleep(interval) 64 | else: 65 | return {"status": "failed", "error": f"Failed to get task status: {response.text}"} 66 | except Exception as e: 67 | return {"status": "failed", "error": f"Error polling task: {str(e)}"} 68 | 69 | return {"status": "timeout", "error": "Task polling timed out"} 70 | 71 | # Conversation Management 72 | def create_conversation() -> Optional[str]: 73 | """Create a new conversation and return its ID.""" 74 | try: 75 | response = httpx.put(get_api_url("/chat/conversation")) 76 | if response.status_code == 200: 77 | return response.json()["id"] 78 | else: 79 | st.error(f"Failed to create conversation: {response.text}") 80 | return None 81 | except Exception as e: 82 | st.error(f"Error creating conversation: {str(e)}") 83 | return None 84 | 85 | def get_conversations(skip: int = 0, limit: int = 20) -> List[Dict[str, Any]]: 86 | """Get a list of conversations with pagination.""" 87 | try: 88 | response = httpx.get(get_api_url(f"/chat/conversations?skip={skip}&limit={limit}")) 89 | if response.status_code == 200: 90 | return response.json() 91 | else: 92 | st.error(f"Failed to get conversations: {response.text}") 93 | return [] 94 | except Exception as e: 95 | st.error(f"Error getting conversations: {str(e)}") 96 | return [] 97 | 98 | def get_conversation(conversation_id: str) -> Optional[Dict[str, Any]]: 99 | """Get a specific conversation by ID.""" 100 | try: 101 | response = httpx.get(get_api_url(f"/chat/conversations/{conversation_id}")) 102 | if response.status_code == 200: 103 | return response.json() 104 | else: 105 | st.error(f"Failed to get conversation: {response.text}") 106 | return None 107 | except Exception as e: 108 | st.error(f"Error getting conversation: {str(e)}") 109 | return None 110 | 111 | def delete_conversation(conversation_id: str) -> bool: 112 | """Delete a conversation by ID.""" 113 | try: 114 | response = httpx.delete(get_api_url(f"/chat/conversations/{conversation_id}")) 115 | if response.status_code == 200: 116 | return True 117 | else: 118 | st.error(f"Failed to delete conversation: {response.text}") 119 | return False 120 | except Exception as e: 121 | st.error(f"Error deleting conversation: {str(e)}") 122 | return False 123 | 124 | # Utility Functions 125 | def format_timestamp(timestamp_str: str) -> str: 126 | """Format an ISO timestamp to a human-readable format.""" 127 | try: 128 | if not timestamp_str: 129 | return "Unknown" 130 | 131 | # Handle both string ISO format and numeric timestamp 132 | if isinstance(timestamp_str, str): 133 | dt = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")) 134 | else: 135 | dt = datetime.fromtimestamp(timestamp_str) 136 | 137 | return dt.strftime("%Y-%m-%d %H:%M") 138 | except Exception: 139 | return "Invalid date" 140 | 141 | def format_file_size(size_bytes: int) -> str: 142 | """Format file size in bytes to human-readable format.""" 143 | if size_bytes < 1024: 144 | return f"{size_bytes} bytes" 145 | elif size_bytes < 1024 * 1024: 146 | return f"{size_bytes / 1024:.1f} KB" 147 | elif size_bytes < 1024 * 1024 * 1024: 148 | return f"{size_bytes / (1024 * 1024):.1f} MB" 149 | else: 150 | return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB" --------------------------------------------------------------------------------