├── .env.example
├── .gitignore
├── .python-version
├── Dockerfile
├── LICENSE
├── README.md
├── assets
└── cygen logo.png
├── docker-compose.yml
├── docker
└── entrypoint.sh
├── pyproject.toml
├── src
├── main.py
├── router
│ ├── __init__.py
│ ├── chat.py
│ └── upload.py
├── settings.py
└── utils
│ ├── __init__.py
│ ├── background_tasks.py
│ ├── llm.py
│ ├── pdf_processor.py
│ ├── text_chunking.py
│ └── vector_store.py
├── start.sh
├── streamlit
├── Dockerfile
├── app.py
├── config.toml
├── run-docker.sh
├── run.sh
└── utils.py
└── uv.lock
/.env.example:
--------------------------------------------------------------------------------
1 | # Server Settings
2 | DEBUG=false
3 | API_V1_PREFIX=/api/v1
4 |
5 | # Processing Settings
6 | MAX_WORKERS=4
7 | CHUNK_SIZE=512
8 | CHUNK_OVERLAP=50
9 |
10 | # Database Settings
11 | MONGODB_URL=mongodb://mongodb:27017
12 | MONGODB_DB_NAME=rag_system
13 |
14 | # Vector Store Settings
15 | QDRANT_URL=http://qdrant:6333
16 | QDRANT_API_KEY=your_qdrant_api_key
17 | COLLECTION_NAME=documents
18 |
19 | # LLM Settings
20 | GROQ_API_KEY=your_groq_api_key
21 | MODEL_NAME=mixtral-8x7b-32768
22 | TEMPERATURE=0.7
23 |
24 | # PDF Processing
25 | OCR_ENABLED=true
26 | PDF_UPLOAD_DIR=uploads
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # UV
98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | #uv.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 |
110 | # pdm
111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | # in version control.
115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 |
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 |
127 | # SageMath parsed files
128 | *.sage.py
129 |
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 |
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 |
143 | # Rope project settings
144 | .ropeproject
145 |
146 | # mkdocs documentation
147 | /site
148 |
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 |
154 | # Pyre type checker
155 | .pyre/
156 |
157 | # pytype static type analyzer
158 | .pytype/
159 |
160 | # Cython debug symbols
161 | cython_debug/
162 |
163 | # PyCharm
164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | # and can be added to the global gitignore or merged into this file. For a more nuclear
167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 |
170 | # Ruff stuff:
171 | .ruff_cache/
172 |
173 | # PyPI configuration file
174 | .pypirc
175 |
176 | # Additional files
177 | cybersec-report/
178 | .cursorrules
179 | *.DS_Store
180 | uploads/
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.11
2 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use Python 3.11 slim image
2 | FROM python:3.11-slim
3 |
4 | # Set working directory
5 | WORKDIR /app
6 |
7 | # Set environment variables
8 | ENV PYTHONUNBUFFERED=1 \
9 | PYTHONDONTWRITEBYTECODE=1 \
10 | UV_SYSTEM_PYTHON=1 \
11 | PATH="/root/.local/bin:$PATH"
12 |
13 | # Install system dependencies
14 | RUN apt-get update && apt-get install -y --no-install-recommends \
15 | build-essential \
16 | libpq-dev \
17 | curl \
18 | && rm -rf /var/lib/apt/lists/*
19 |
20 | # Install uv
21 | RUN curl -LsSf https://astral.sh/uv/install.sh | sh
22 |
23 | # Copy project files
24 | COPY pyproject.toml .
25 | COPY . .
26 |
27 | # Create required directories
28 | RUN mkdir -p uploads logs && chmod 777 uploads logs
29 |
30 | # Create venv and install dependencies
31 | RUN uv venv && \
32 | . .venv/bin/activate && \
33 | uv pip install -e .
34 |
35 | # Development mode: Use entrypoint script
36 | COPY docker/entrypoint.sh /entrypoint.sh
37 | RUN chmod +x /entrypoint.sh
38 |
39 | ENTRYPOINT ["/entrypoint.sh"]
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # CyGen: Self-Hosted LLM for Cybersecurity Analysis 🛡️
4 |
5 | 
6 | 
7 | 
8 | 
9 | 
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | CyGen is a powerful Retrieval-Augmented Generation (RAG) system built with FastAPI, MongoDB, Qdrant, and Groq LLM, featuring a Streamlit frontend for seamless interaction. This system allows you to upload PDF documents, process them intelligently, and have natural language conversations about their content.
18 |
19 | ## ✨ Features
20 |
21 | - **📄 Advanced PDF Document Ingestion**
22 | - Multi-threaded PDF processing
23 | - Intelligent text chunking with configurable parameters
24 | - Background task queue for non-blocking operations
25 | - Progress tracking for document processing
26 |
27 | - **🔍 Smart Vector Search**
28 | - Semantic similarity search using embeddings
29 | - Context-aware document retrieval
30 | - Configurable relevance thresholds
31 | - Metadata-enhanced document chunks
32 |
33 | - **💬 Interactive Chat Interface**
34 | - Real-time chat with HTTP POST endpoint
35 | - Context window management
36 | - Conversation history with MongoDB
37 | - Automatic conversation titles generation
38 |
39 | - **🧠 Groq LLM Integration**
40 | - Fast inference with 8k context window
41 | - Optimized prompting strategy
42 | - Balanced context retrieval
43 | - Temperature control for response diversity
44 |
45 | - **🖥️ User-friendly Web UI**
46 | - Document upload with progress indicators
47 | - Conversation management
48 | - Responsive design
49 | - Real-time chat updates
50 |
51 | ## 🏗️ System Architecture
52 |
53 |
54 |
55 | ```mermaid
56 | flowchart TD
57 | subgraph Client
58 | UI[Streamlit Frontend]
59 | end
60 |
61 | subgraph Backend
62 | API[FastAPI Backend]
63 | TaskQueue[Background Task Queue]
64 | VectorDB[(Qdrant Vector DB)]
65 | MongoDB[(MongoDB)]
66 | LLM[Groq LLM API]
67 | end
68 |
69 | subgraph Processing
70 | PDF[PDF Processor]
71 | Chunker[Text Chunker]
72 | Embedder[Embedding Model]
73 | end
74 |
75 | %% Client to Backend interactions
76 | UI -->|1. Upload PDF| API
77 | UI -->|5. Send Query| API
78 | API -->|8. Stream Response| UI
79 |
80 | %% Document Processing Flow
81 | API -->|2. Process Document| TaskQueue
82 | TaskQueue -->|3. Extract & Chunk| PDF
83 | PDF -->|3.1. Split Text| Chunker
84 | Chunker -->|3.2. Generate Embeddings| Embedder
85 | Embedder -->|3.3. Store Vectors| VectorDB
86 | Embedder -->|3.4. Store Metadata| MongoDB
87 |
88 | %% Query Processing Flow
89 | API -->|6. Retrieve Context| VectorDB
90 | API -->|6.1. Get History| MongoDB
91 | API -->|7. Generate Response| LLM
92 | VectorDB -->|6.2. Relevant Chunks| API
93 | MongoDB -->|6.3. Conversation History| API
94 |
95 | %% Styles
96 | classDef primary fill:#4527A0,stroke:#4527A0,color:white,stroke-width:2px
97 | classDef secondary fill:#7E57C2,stroke:#7E57C2,color:white
98 | classDef database fill:#1A237E,stroke:#1A237E,color:white
99 | classDef processor fill:#FF7043,stroke:#FF7043,color:white
100 | classDef client fill:#00ACC1,stroke:#00ACC1,color:white
101 |
102 | class API,TaskQueue primary
103 | class PDF,Chunker,Embedder processor
104 | class VectorDB,MongoDB database
105 | class LLM secondary
106 | class UI client
107 | ```
108 |
109 |
110 |
111 | The system comprises several key components that work together:
112 |
113 | - **FastAPI Backend**
114 | - RESTful API endpoints and background task processing
115 | - Asynchronous request handling for high concurrency
116 | - Dependency injection for clean service management
117 | - Error handling and logging
118 |
119 | - **MongoDB**
120 | - Conversation history storage
121 | - Document metadata and status tracking
122 | - Asynchronous operations with Motor client
123 | - Indexed collections for fast retrieval
124 |
125 | - **Qdrant Vector Database**
126 | - High-performance vector storage and retrieval
127 | - Scalable embedding storage
128 | - Similarity search with metadata filtering
129 | - Optimized for semantic retrieval
130 |
131 | - **Groq LLM Integration**
132 | - Ultra-fast inference for responsive conversation
133 | - 8k token context window
134 | - Adaptive system prompts based on query context
135 | - Clean API integration with error handling
136 |
137 | - **Streamlit Frontend**
138 | - Intuitive user interface for document uploads
139 | - Conversation management and history
140 | - Real-time chat interaction
141 | - Mobile-responsive design
142 |
143 | ## ⚙️ Technical Details
144 |
145 | ### PDF Processing Pipeline
146 |
147 | Our PDF processing pipeline is designed for efficiency and accuracy:
148 |
149 | 1. **Text Extraction**: Extract raw text from PDF documents using PyPDF2
150 | 2. **Text Cleaning**: Remove artifacts and normalize text
151 | 3. **Chunking Strategy**: Implement recursive chunking with smart boundary detection
152 | 4. **Metadata Enrichment**: Add page numbers, file paths, and other metadata
153 | 5. **Vector Embedding**: Generate embeddings for each chunk
154 | 6. **Storage**: Store vectors in Qdrant and metadata in MongoDB
155 |
156 | ### RAG Implementation
157 |
158 | The RAG system follows a sophisticated approach to content retrieval:
159 |
160 | 1. **Query Analysis**: Analyze user query for intent and keywords
161 | 2. **Context Retrieval**: Retrieve relevant document chunks from vector store
162 | 3. **Threshold Filtering**: Filter results based on similarity score threshold
163 | 4. **Context Assembly**: Combine retrieved chunks with conversation history
164 | 5. **Prompt Construction**: Build prompt with system instructions and context
165 | 6. **LLM Generation**: Generate response using Groq LLM
166 | 7. **Response Delivery**: Deliver response to user in real-time
167 |
168 | ## 🚀 Getting Started
169 |
170 | ### Prerequisites
171 |
172 | - Docker and Docker Compose
173 | - Python 3.11+
174 | - uv package manager (recommended for local development)
175 | - Groq API key
176 | - MongoDB instance (local or Atlas)
177 | - Qdrant instance (local or cloud)
178 |
179 | ### Environment Setup
180 |
181 | 1. Clone the repository:
182 | ```bash
183 | git clone https://github.com/yourusername/cygen.git
184 | cd cygen
185 | ```
186 |
187 | 2. Copy the example environment file:
188 | ```bash
189 | cp .env.example .env
190 | ```
191 |
192 | 3. Update the following variables in `.env`:
193 | ```
194 | GROQ_API_KEY=your_groq_api_key
195 | MONGODB_URL=mongodb://username:password@host:port/db_name
196 | QDRANT_URL=http://qdrant_host:port
197 | MAX_WORKERS=4
198 | CHUNK_SIZE=512
199 | CHUNK_OVERLAP=50
200 | TOP_K=5
201 | RAG_THRESHOLD=0.75
202 | TEMPERATURE=0.7
203 | N_LAST_MESSAGE=5
204 | ```
205 |
206 | ### Running the Application
207 |
208 | #### Option 1: Using the Interactive Launcher Script
209 |
210 | ```bash
211 | chmod +x start.sh
212 | ./start.sh
213 | ```
214 |
215 | The launcher offers the following options:
216 | 1. Start both the FastAPI backend and Streamlit frontend with Docker Compose
217 | 2. Start only the FastAPI backend
218 | 3. Start only the Streamlit frontend (with Docker or locally)
219 |
220 | #### Option 2: Using Docker Compose
221 |
222 | Start all services:
223 | ```bash
224 | docker-compose up --build
225 | ```
226 |
227 | Start only specific services:
228 | ```bash
229 | docker-compose up --build app # Backend only
230 | docker-compose up --build streamlit # Frontend only
231 | ```
232 |
233 | #### Option 3: Running Locally (Development)
234 |
235 | 1. Create and activate a virtual environment:
236 | ```bash
237 | uv venv
238 | source .venv/bin/activate # Linux/macOS
239 | .venv\Scripts\activate # Windows
240 | ```
241 |
242 | 2. Install dependencies:
243 | ```bash
244 | uv pip install -e .
245 | ```
246 |
247 | 3. Start the FastAPI backend:
248 | ```bash
249 | uvicorn src.main:app --reload --port 8000
250 | ```
251 |
252 | 4. Start the Streamlit frontend (in a separate terminal):
253 | ```bash
254 | cd streamlit
255 | ./run.sh # or `streamlit run app.py`
256 | ```
257 |
258 | ### Accessing the Application
259 |
260 | - **Streamlit Frontend**: http://localhost:8501
261 | - **FastAPI Swagger Docs**: http://localhost:8000/docs
262 | - **API Base URL**: http://localhost:8000/api/v1
263 |
264 | ## 📋 Usage Guide
265 |
266 | ### Document Upload
267 |
268 | 1. Navigate to the Streamlit web interface
269 | 2. Click on the "Upload Documents" section in the sidebar
270 | 3. Select a PDF file (limit: 200MB per file)
271 | 4. Click "Process Document"
272 | 5. Wait for the processing to complete (progress will be displayed)
273 |
274 | ### Creating a Conversation
275 |
276 | 1. Click "New Conversation" in the sidebar
277 | 2. A new conversation will be created with a temporary title
278 | 3. The title will be automatically updated based on your first message
279 |
280 | ### Chatting with Your Documents
281 |
282 | 1. Type your question in the chat input
283 | 2. The system will:
284 | - Retrieve relevant context from your documents
285 | - Consider your conversation history
286 | - Generate a comprehensive answer
287 | 3. Continue the conversation with follow-up questions
288 |
289 | ### Managing Conversations
290 |
291 | - All your conversations are saved and accessible from the sidebar
292 | - Select any conversation to continue where you left off
293 | - Conversation history is preserved between sessions
294 |
295 | ## 🔧 API Endpoints
296 |
297 | The system exposes the following key API endpoints:
298 |
299 | ### Documents API
300 |
301 | - `POST /api/v1/documents/upload`: Upload a PDF document
302 | - `GET /api/v1/documents/task/{task_id}`: Check document processing status
303 |
304 | ### Chat API
305 |
306 | - `PUT /api/v1/chat/conversation`: Create a new conversation
307 | - `GET /api/v1/chat/conversations`: List all conversations
308 | - `GET /api/v1/chat/conversations/{conversation_id}`: Get a specific conversation
309 | - `DELETE /api/v1/chat/conversations/{conversation_id}`: Delete a conversation
310 | - `POST /api/v1/chat/{conversation_id}`: Send a message in a conversation
311 |
312 | ## 📁 Project Structure
313 |
314 | ```
315 | .
316 | ├── docker/ # Docker configuration files
317 | │ ├── app/ # Backend Docker setup
318 | │ └── streamlit/ # Frontend Docker setup
319 | ├── logs/ # Application logs
320 | ├── src/ # Backend source code
321 | │ ├── router/ # API route definitions
322 | │ │ ├── chat.py # Chat endpoints
323 | │ │ └── documents.py # Document endpoints
324 | │ ├── utils/ # Utility modules
325 | │ │ ├── llm.py # LLM integration
326 | │ │ ├── pdf_processor.py # PDF processing
327 | │ │ ├── text_chunking.py # Text chunking
328 | │ │ └── vector_store.py # Vector database interface
329 | │ ├── main.py # FastAPI application entry
330 | │ └── settings.py # Application settings
331 | ├── streamlit/ # Streamlit frontend
332 | │ ├── app.py # Main Streamlit application
333 | │ └── utils.py # Frontend utilities
334 | ├── tests/ # Test suite
335 | │ ├── unit/ # Unit tests
336 | │ └── integration/ # Integration tests
337 | ├── uploads/ # Uploaded documents storage
338 | ├── .env.example # Example environment variables
339 | ├── docker-compose.yml # Docker Compose configuration
340 | ├── Dockerfile # Backend Dockerfile
341 | ├── pyproject.toml # Python project configuration
342 | ├── start.sh # Interactive launcher script
343 | └── README.md # Project documentation
344 | ```
345 |
346 | ## 🛠️ Configuration Options
347 |
348 | The system can be configured through environment variables:
349 |
350 | | Variable | Description | Default |
351 | |----------|-------------|---------|
352 | | `GROQ_API_KEY` | Groq API key for LLM integration | - |
353 | | `MONGODB_URL` | MongoDB connection string | mongodb://localhost:27017 |
354 | | `MONGODB_DB_NAME` | MongoDB database name | rag_system |
355 | | `QDRANT_URL` | Qdrant server URL | http://localhost:6333 |
356 | | `MAX_WORKERS` | Maximum worker threads for PDF processing | 4 |
357 | | `CHUNK_SIZE` | Target chunk size for document splitting | 512 |
358 | | `CHUNK_OVERLAP` | Overlap between consecutive chunks | 50 |
359 | | `TOP_K` | Number of chunks to retrieve per query | 5 |
360 | | `RAG_THRESHOLD` | Similarity threshold for relevance | 0.75 |
361 | | `TEMPERATURE` | LLM temperature setting | 0.7 |
362 | | `N_LAST_MESSAGE` | Number of previous messages to include | 5 |
363 |
364 | ## 🤝 Contributing
365 |
366 | Contributions are welcome! Here's how you can help:
367 |
368 | 1. Fork the repository
369 | 2. Create a feature branch: `git checkout -b feature/amazing-feature`
370 | 3. Commit your changes: `git commit -m 'Add amazing feature'`
371 | 4. Push to the branch: `git push origin feature/amazing-feature`
372 | 5. Open a pull request
373 |
374 | Please ensure your code follows our style guidelines and includes appropriate tests.
375 |
376 | ## 📝 License
377 |
378 | This project is licensed under the MIT License - see the LICENSE file for details.
379 |
380 | ## 📧 Contact
381 |
382 | Project Link: [https://github.com/NnA301023/cygen](https://github.com/NnA301023/cygen)
383 |
384 | ## 🌐 Connect With Us
385 |
386 | ### Our Platforms
387 | - **Magazine**: [ITSec Buzz](https://itsec.buzz/)
388 | - **Engineering Space**: [ITSec Asia Tech](https://www.itsecasia.tech/)
389 |
390 | ### Social Media
391 | - [Instagram](https://www.instagram.com/rndforge.official/)
392 | - [TikTok](https://www.tiktok.com/@rndforge)
393 | - [Threads](https://www.threads.net/@rndforge.official)
394 | - [YouTube](https://www.youtube.com/@rndforgeofficial)
395 |
396 | ---
397 |
398 |
399 |
Built with ❤️ by RnD Team
400 |
--------------------------------------------------------------------------------
/assets/cygen logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ITSEC-Research/cygen/12682278a30ca52cd5e159907a49b42540ed6486/assets/cygen logo.png
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | app:
3 | build: .
4 | ports:
5 | - "8000:8000"
6 | volumes:
7 | - .:/app
8 | - ./uploads:/app/uploads
9 | - ./logs:/app/logs
10 | environment:
11 | - ENVIRONMENT=development # Change to 'production' for production mode
12 | - MAX_WORKERS=4
13 | - PYTHONPATH=/app
14 | env_file:
15 | - .env
16 | depends_on:
17 | - mongodb
18 | - qdrant
19 | develop:
20 | watch:
21 | - path: ./service
22 | target: /app/service
23 | action: sync
24 |
25 | streamlit:
26 | build:
27 | context: .
28 | dockerfile: streamlit/Dockerfile
29 | ports:
30 | - "8501:8501"
31 | volumes:
32 | - ./streamlit:/app/streamlit
33 | - ./uploads:/app/uploads
34 | - ./logs:/app/logs
35 | environment:
36 | - API_URL=http://app:8000
37 | - WS_URL=ws://app:8000
38 | - PYTHONPATH=/app
39 | - STREAMLIT_SERVER_PORT=8501
40 | - STREAMLIT_SERVER_HEADLESS=true
41 | - STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
42 | - MONGODB_URL=mongodb://mongodb:27017
43 | env_file:
44 | - .env
45 | depends_on:
46 | - app
47 | restart: unless-stopped
48 | healthcheck:
49 | test: ["CMD", "curl", "--fail", "http://localhost:8501/_stcore/health"]
50 | interval: 30s
51 | timeout: 10s
52 | retries: 3
53 | start_period: 5s
54 | develop:
55 | watch:
56 | - path: ./streamlit
57 | target: /app/streamlit
58 | action: sync
59 |
60 | mongodb:
61 | image: mongo:latest
62 | ports:
63 | - "27017:27017"
64 | volumes:
65 | - mongodb_data:/data/db
66 | environment:
67 | - MONGO_INITDB_DATABASE=rag_system
68 |
69 | qdrant:
70 | image: qdrant/qdrant:latest
71 | ports:
72 | - "6333:6333"
73 | - "6334:6334"
74 | volumes:
75 | - qdrant_data:/qdrant/storage
76 | environment:
77 | - QDRANT_API_KEY=${QDRANT_API_KEY}
78 |
79 | volumes:
80 | mongodb_data:
81 | qdrant_data:
--------------------------------------------------------------------------------
/docker/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | # Check runtime directory
5 | echo "Current directory contents:"
6 | ls -la
7 |
8 | # Activate virtual environment
9 | . .venv/bin/activate
10 |
11 | # Install dependencies in development mode
12 | if [ "$ENVIRONMENT" = "development" ]; then
13 | echo "Installing dependencies in development mode..."
14 | uv pip install -e .
15 | else
16 | echo "Installing dependencies in production mode..."
17 | uv pip install .
18 | fi
19 |
20 | # Run the application with hot reload in development
21 | if [ "$ENVIRONMENT" = "development" ]; then
22 | echo "Starting server in development mode with hot reload..."
23 | exec uvicorn src.main:app --host 0.0.0.0 --port 8000 --reload --reload-dir /app/src
24 | else
25 | echo "Starting server in production mode..."
26 | exec uvicorn src.main:app --host 0.0.0.0 --port 8000 --workers $MAX_WORKERS
27 | fi
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "cygen"
3 | version = "0.1.0"
4 | description = "Advanced RAG System with Groq LLM"
5 | readme = "README.md"
6 | requires-python = ">=3.11"
7 | dependencies = [
8 |
9 | # FastAPI and Server
10 | "fastapi>=0.104.0",
11 | "uvicorn[standard]>=0.24.0",
12 | "websockets>=12.0",
13 | "python-multipart>=0.0.6",
14 | "pydantic>=2.5.0",
15 | "pydantic-settings>=2.1.0",
16 |
17 | # Database
18 | "motor>=3.3.0",
19 | "qdrant-client>=1.7.0",
20 |
21 | # PDF Processing
22 | "PyPDF2>=3.0.0",
23 |
24 | # LLM and Embeddings
25 | "groq>=0.4.0",
26 | "fastembed>=0.2.0",
27 | "langchain>=0.1.9",
28 |
29 | # Utilities
30 | "python-dotenv>=1.0.0",
31 | "tenacity>=8.2.3",
32 | "loguru>=0.7.2",
33 | "aiofiles>=23.2.1",
34 |
35 | # Streamlit UI
36 | "streamlit>=1.31.0",
37 | "streamlit-chat>=0.1.1",
38 | "httpx>=0.25.2",
39 | "watchdog>=3.0.0",
40 | "websocket-client>=1.8.0",
41 | ]
42 |
--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
1 | from fastapi import FastAPI, HTTPException
2 | from fastapi.middleware.cors import CORSMiddleware
3 | from fastapi.responses import JSONResponse
4 | from contextlib import asynccontextmanager
5 | from loguru import logger
6 | import asyncio
7 |
8 | from .settings import get_settings
9 | from .router import upload, chat
10 |
11 | # Load settings
12 | settings = get_settings()
13 |
14 | # Configure logger
15 | logger.add(
16 | "logs/app.log",
17 | rotation="500 MB",
18 | retention="10 days",
19 | level="INFO"
20 | )
21 |
22 | @asynccontextmanager
23 | async def lifespan(app: FastAPI):
24 | """
25 | Lifespan context manager for the FastAPI application.
26 | Handles startup and shutdown events.
27 | """
28 |
29 | # Service Startup...
30 | app.state.max_workers = settings.MAX_WORKERS
31 | app.state.processing_semaphore = asyncio.Semaphore(settings.MAX_WORKERS)
32 | logger.info(f"Server starting with {settings.MAX_WORKERS} workers")
33 |
34 | yield # Server is running
35 |
36 | # Service Shutdown
37 | logger.info("Server shutting down")
38 |
39 | # Initialize FastAPI app
40 | app = FastAPI(
41 | title=settings.APP_NAME,
42 | description="Advanced RAG System with Groq LLM",
43 | version="1.0.0",
44 | docs_url="/docs",
45 | redoc_url="/redoc",
46 | lifespan=lifespan
47 | )
48 |
49 | # Configure CORS
50 | app.add_middleware(
51 | CORSMiddleware,
52 | allow_origins=["*"],
53 | allow_credentials=True,
54 | allow_methods=["*"],
55 | allow_headers=["*"],
56 | )
57 |
58 | # Include routers
59 | app.include_router(
60 | upload.router,
61 | prefix=f"{settings.API_V1_PREFIX}/documents",
62 | tags=["Document Processing"]
63 | )
64 |
65 | app.include_router(
66 | chat.router,
67 | prefix=f"{settings.API_V1_PREFIX}/chat",
68 | tags=["Chat"]
69 | )
70 |
71 | # Health check endpoint
72 | @app.get("/", include_in_schema=False)
73 | async def root_handler():
74 | """Health check endpoint."""
75 | return {
76 | "status": "healthy",
77 | "workers": settings.MAX_WORKERS,
78 | "version": "1.0.0"
79 | }
80 |
81 | # Health check endpoint
82 | @app.get("/health", include_in_schema=False)
83 | async def health_check():
84 | """Health check endpoint."""
85 | return {"status": "healthy"}
86 |
87 | # Error handlers
88 | @app.exception_handler(HTTPException)
89 | async def http_exception_handler(request, exc):
90 | """Handle HTTP exceptions."""
91 | return JSONResponse(
92 | status_code=exc.status_code,
93 | content={"detail": exc.detail}
94 | )
95 |
96 | @app.exception_handler(Exception)
97 | async def general_exception_handler(request, exc):
98 | """Handle general exceptions."""
99 | logger.exception("Unhandled exception")
100 | return JSONResponse(
101 | status_code=500,
102 | content={"detail": "Internal server error"}
103 | )
--------------------------------------------------------------------------------
/src/router/__init__.py:
--------------------------------------------------------------------------------
1 | """Router package for the FastAPI application."""
2 |
--------------------------------------------------------------------------------
/src/router/chat.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Any, List
2 | import json
3 | import uuid
4 | import traceback
5 | from datetime import datetime
6 |
7 | from fastapi import APIRouter, WebSocket, WebSocketDisconnect, HTTPException
8 | from pydantic import BaseModel
9 | from loguru import logger
10 | from motor.motor_asyncio import AsyncIOMotorClient
11 |
12 | from ..settings import get_settings
13 | from ..utils.vector_store import VectorStore
14 | from ..utils.llm import GroqLLM
15 |
16 | settings = get_settings()
17 |
18 | router = APIRouter()
19 |
20 | # Initialize services
21 | vector_store = VectorStore()
22 | llm = GroqLLM()
23 |
24 | class ChatMessage(BaseModel):
25 | """Chat message model."""
26 | role: str
27 | content: str
28 | timestamp: datetime = None
29 | feedback: Dict[str, Any] = {
30 | "thumbs": None, # "up" or "down"
31 | "comment": None, # Optional feedback comment
32 | "submitted_at": None # Timestamp when feedback was submitted
33 | }
34 |
35 | def to_dict(self) -> Dict[str, Any]:
36 | """Convert message to dictionary with ISO formatted timestamp."""
37 | data = self.model_dump()
38 | if self.timestamp:
39 | data["timestamp"] = self.timestamp.isoformat()
40 | if self.feedback and self.feedback.get("submitted_at"):
41 | data["feedback"]["submitted_at"] = self.feedback["submitted_at"].isoformat()
42 | return data
43 |
44 | class ConversationResponse(BaseModel):
45 | """Response model for conversation operations."""
46 | id: str
47 | title: str
48 | metadata: Dict[str, Any] = {}
49 | created_at: datetime
50 | updated_at: datetime = None
51 | message_count: int = 0
52 | last_message: ChatMessage | None = None
53 |
54 | class Conversation(BaseModel):
55 | """Conversation model."""
56 | id: str
57 | title: str
58 | messages: List[ChatMessage]
59 | metadata: Dict[str, Any] = {}
60 | created_at: datetime = None
61 | updated_at: datetime = None
62 |
63 | class ConnectionManager:
64 | """Manages WebSocket connections."""
65 |
66 | def __init__(self):
67 | self.active_connections: Dict[str, WebSocket] = {}
68 | self.mongo_client = AsyncIOMotorClient(settings.MONGODB_URL)
69 | self.db = self.mongo_client[settings.MONGODB_DB_NAME]
70 |
71 | async def connect(self, websocket: WebSocket, conversation_id: str):
72 | """Connect a new client."""
73 | # Verify conversation exists
74 | conversation = await self.get_conversation_history(conversation_id)
75 | if not conversation:
76 | raise HTTPException(status_code=404, detail="Conversation not found")
77 |
78 | await websocket.accept()
79 | self.active_connections[conversation_id] = websocket
80 | logger.info(f"Client connected to conversation {conversation_id}")
81 |
82 | def disconnect(self, conversation_id: str):
83 | """Disconnect a client."""
84 | if conversation_id in self.active_connections:
85 | del self.active_connections[conversation_id]
86 | logger.info(f"Client disconnected from conversation {conversation_id}")
87 |
88 | async def send_message(self, conversation_id: str, message: ChatMessage):
89 | """Send a message to a specific client."""
90 | if conversation_id in self.active_connections:
91 | websocket = self.active_connections[conversation_id]
92 | await websocket.send_json(message.to_dict())
93 |
94 | async def get_conversation_history(self, conversation_id: str) -> Conversation:
95 | """Get conversation history from MongoDB."""
96 | conversation = await self.db.conversations.find_one({"id": conversation_id})
97 | if conversation:
98 | return Conversation(**conversation)
99 | return None
100 |
101 | async def save_message(self, conversation_id: str, message: ChatMessage):
102 | """Save a message to conversation history."""
103 | now = datetime.utcnow()
104 | message.timestamp = now
105 |
106 | # Update or create conversation
107 | await self.db.conversations.update_one(
108 | {"id": conversation_id},
109 | {
110 | "$push": {"messages": message.to_dict()},
111 | "$set": {"updated_at": now},
112 | "$setOnInsert": {
113 | "id": conversation_id,
114 | "created_at": now,
115 | "metadata": {}
116 | }
117 | },
118 | upsert=True
119 | )
120 |
121 | async def update_title(self, conversation_id: str, title: str):
122 | """Update conversation title."""
123 | await self.db.conversations.update_one(
124 | {"id": conversation_id},
125 | {"$set": {"title": title}}
126 | )
127 | logger.info(f"Updated title for conversation {conversation_id}: {title}")
128 |
129 | # Initialize connection manager
130 | manager = ConnectionManager()
131 |
132 | class ChatRequest(BaseModel):
133 | """Chat request model."""
134 | message: str
135 |
136 | class ChatResponse(BaseModel):
137 | """Chat response model."""
138 | role: str
139 | content: str
140 | timestamp: datetime = None
141 |
142 | class FeedbackRequest(BaseModel):
143 | """Feedback request model."""
144 | thumbs: str # "up" or "down"
145 | comment: str | None = None
146 |
147 | @router.put("/conversation", response_model=ConversationResponse)
148 | async def create_conversation():
149 | """Create a new conversation with temporary title."""
150 | conversation_id = str(uuid.uuid4())
151 | now = datetime.utcnow()
152 |
153 | # Use a temporary title that will be updated with the first message
154 | title = "New Conversation"
155 |
156 | # System metadata
157 | metadata = {
158 | "created_by": "system",
159 | "created_at_timestamp": now.timestamp(),
160 | "source": "api",
161 | "title_generated": False # Flag to track if title has been generated
162 | }
163 |
164 | conversation_data = {
165 | "id": conversation_id,
166 | "title": title,
167 | "metadata": metadata,
168 | "messages": [],
169 | "created_at": now,
170 | "updated_at": now
171 | }
172 |
173 | await manager.db.conversations.insert_one(conversation_data)
174 | logger.info(f"Created conversation {conversation_id}")
175 |
176 | return ConversationResponse(
177 | id=conversation_id,
178 | title=title,
179 | metadata=metadata,
180 | created_at=now,
181 | updated_at=now,
182 | message_count=0
183 | )
184 |
185 | @router.get("/conversations", response_model=List[ConversationResponse])
186 | async def list_conversations(skip: int = 0, limit: int = 10):
187 | """List all conversations."""
188 | conversations = []
189 | cursor = manager.db.conversations.find().sort("updated_at", -1).skip(skip).limit(limit)
190 |
191 | async for conv in cursor:
192 | last_message = None
193 | messages = conv.get("messages", [])
194 | if messages:
195 | last_message = ChatMessage(**messages[-1])
196 | conversations.append(ConversationResponse(
197 | id=conv["id"],
198 | title=conv.get("title", "New Conversation"),
199 | metadata=conv.get("metadata", {}),
200 | created_at=conv["created_at"],
201 | updated_at=conv.get("updated_at"),
202 | message_count=len(messages),
203 | last_message=last_message
204 | ))
205 |
206 | return conversations
207 |
208 | @router.get("/conversations/{conversation_id}", response_model=Conversation)
209 | async def get_conversation(conversation_id: str):
210 | """Get a specific conversation."""
211 | conversation = await manager.get_conversation_history(conversation_id)
212 | if not conversation:
213 | raise HTTPException(status_code=404, detail="Conversation not found")
214 | return conversation
215 |
216 | @router.delete("/conversations/{conversation_id}")
217 | async def delete_conversation(conversation_id: str):
218 | """Delete a conversation."""
219 | result = await manager.db.conversations.delete_one({"id": conversation_id})
220 | if result.deleted_count == 0:
221 | raise HTTPException(status_code=404, detail="Conversation not found")
222 |
223 | # Disconnect any active WebSocket connections
224 | manager.disconnect(conversation_id)
225 | return {"status": "success", "message": "Conversation deleted"}
226 |
227 | async def generate_title(message: str) -> str:
228 | """Generate a concise title from the first message using LLM."""
229 | try:
230 | system_prompt = """You are a helpful assistant that generates concise conversation titles.
231 | Create a brief, descriptive title (maximum 6 words) based on the user's first message.
232 | The title should capture the main topic or intent. Respond with ONLY the title, no other text."""
233 |
234 | response = await llm.chat_completion(
235 | messages=[
236 | {"role": "system", "content": system_prompt},
237 | {"role": "user", "content": f"Generate a title for this conversation that starts with: {message}"}
238 | ],
239 | temperature=settings.TEMPERATURE,
240 | max_tokens=25
241 | )
242 |
243 | # Clean up the response
244 | title = response.strip("'").strip('"').strip()
245 | return title
246 |
247 | except Exception as e:
248 | traceback.print_exc()
249 | logger.error(f"Error generating title: {str(e)}")
250 | return "New Conversation" # Fallback title
251 |
252 | @router.websocket("/ws/{conversation_id}")
253 | async def chat_websocket(websocket: WebSocket, conversation_id: str):
254 | """WebSocket endpoint for chat."""
255 | try:
256 | await manager.connect(websocket, conversation_id)
257 |
258 | # Load conversation history
259 | logger.info("Retrieve Conv. History")
260 | conversation = await manager.get_conversation_history(conversation_id)
261 |
262 | # NOTE: Buat apa?
263 | # if conversation:
264 | # for message in conversation.messages:
265 | # await manager.save_message(conversation_id, message)
266 |
267 | while True:
268 |
269 | # Receive message from client
270 | data = await websocket.receive_json()
271 | user_message = ChatMessage(
272 | role="user",
273 | content=data["message"]
274 | )
275 |
276 | # Save user message
277 | await manager.save_message(conversation_id, user_message)
278 |
279 | try:
280 | # Generate title if this is the first message
281 | logger.info("Generate Title...")
282 | if not conversation or (not conversation.messages and not conversation.metadata.get("title_generated")):
283 | title = await generate_title(user_message.content)
284 | await manager.update_title(conversation_id, title)
285 | await manager.db.conversations.update_one(
286 | {"id": conversation_id},
287 | {"$set": {"metadata.title_generated": True}}
288 | )
289 |
290 | # Get relevant context from vector store
291 | logger.info("Retrieve Relevant Context...")
292 | context = await vector_store.similarity_search(
293 | query=user_message.content,
294 | k=settings.TOP_K
295 | )
296 |
297 | # Determine if this is a basic conversation or needs context
298 | logger.info("Determine Route Conversation (Basic / RAG)")
299 | is_basic_conversation = len(context) == 0 or all(c['score'] < settings.RAG_THRESHOLD for c in context)
300 |
301 | # Prepare conversation context
302 | conversation_context = ["Conversation History:"]
303 | if conversation:
304 | logger.info(conversation.messages)
305 | for message in conversation.messages[settings.N_LAST_MESSAGE:]:
306 | conversation_context.append(f"{message.role}: {message.content}")
307 | conversation_context = "\n".join(conversation_context)
308 |
309 | # Select appropriate system prompt based on query type
310 | if is_basic_conversation:
311 | system_prompt = """
312 | Answer accoding user language, also consider conversation history if necessary to answer question.
313 | You are a helpful and friendly AI assistant.
314 | Engage in natural conversation and provide accurate, concise responses.
315 | If the user mentions something vague or unclear, politely ask for clarification or context
316 | to ensure you provide the most relevant and helpful answer.
317 | If the user refers to specific documents or information,
318 | let them know you can search through the knowledge base to assist them.
319 | """
320 |
321 | messages = [
322 | {"role": "system", "content": system_prompt},
323 | {"role": "user", "content": conversation_context},
324 | {"role": "user", "content": user_message.content}
325 | ]
326 | else:
327 | system_prompt = """
328 | Answer according to user language, also consider conversation history if necessary to answer question.
329 | You are a helpful AI assistant with access to a knowledge base of documents.
330 | Use the provided context to answer questions accurately and comprehensively.
331 |
332 | For each response:
333 | 1. Analyze the provided context and cite specific sources using page numbers
334 | 2. Structure your response to clearly separate information from different sources
335 | 3. When citing information, use the format: [Source: filename, Page: X]
336 | 4. If multiple sources support a point, cite all relevant sources
337 | 5. If the context doesn't fully address the question, clearly state what information is from the sources and what is general knowledge
338 |
339 | Always maintain accuracy over completeness. If you're unsure about something, acknowledge your uncertainty and explain what evidence you do have from the sources.
340 |
341 | Remember to:
342 | - Provide page numbers for all cited information
343 | - Distinguish between direct quotes and paraphrased content
344 | - Note any conflicting information between sources
345 | - Be transparent about gaps in the provided context
346 | """
347 | context_knowledge = [f"{cont['text']}\nSource: {cont['file_path']} - Page Number: {cont['page_number']}" for cont in context]
348 | context_knowledge = "\n".join(context_knowledge)
349 | messages = [
350 | {"role": "system", "content": system_prompt},
351 | {"role": "user", "content": conversation_context},
352 | {
353 | "role": "user",
354 | "content": f"""
355 | Context: {context_knowledge}
356 | Question: {user_message.content}
357 | """}
358 | ]
359 |
360 | # Generate response using LLM
361 | logger.info("LLM Generate Response...")
362 | logger.info(f"Message Throw: {messages}")
363 | response = await llm.chat_completion(
364 | messages=messages,
365 | temperature=settings.TEMPERATURE
366 | )
367 |
368 | # Create assistant message
369 | assistant_message = ChatMessage(
370 | role="assistant",
371 | content=response
372 | )
373 | logger.info(f"Generated response for {'basic' if is_basic_conversation else 'context-based'} query")
374 | logger.info(assistant_message.to_dict())
375 |
376 | # Save assistant message
377 | await manager.save_message(conversation_id, assistant_message)
378 |
379 | # Send response to client
380 | await manager.send_message(conversation_id, assistant_message)
381 |
382 | except Exception as e:
383 | traceback.print_exc()
384 | logger.error(f"Error processing message: {str(e)}")
385 | error_message = ChatMessage(
386 | role="system",
387 | content="I apologize, but I encountered an error processing your message."
388 | )
389 | await manager.send_message(conversation_id, error_message)
390 |
391 | except WebSocketDisconnect:
392 | traceback.print_exc()
393 | manager.disconnect(conversation_id)
394 |
395 | except Exception as e:
396 | traceback.print_exc()
397 | logger.error(f"WebSocket error: {str(e)}")
398 | manager.disconnect(conversation_id)
399 |
400 | @router.post("/{conversation_id}", response_model=ChatResponse)
401 | async def chat_post(conversation_id: str, request: ChatRequest):
402 | """POST endpoint for chat - mirrors WebSocket functionality."""
403 | try:
404 | # Verify conversation exists
405 | conversation = await manager.get_conversation_history(conversation_id)
406 | if not conversation:
407 | raise HTTPException(status_code=404, detail="Conversation not found")
408 |
409 | # Create user message
410 | user_message = ChatMessage(
411 | role="user",
412 | content=request.message
413 | )
414 |
415 | # Save user message
416 | await manager.save_message(conversation_id, user_message)
417 |
418 | try:
419 | # Generate title if this is the first message
420 | logger.info("Generate Title...")
421 | if not conversation or (not conversation.messages and not conversation.metadata.get("title_generated")):
422 | title = await generate_title(user_message.content)
423 | await manager.update_title(conversation_id, title)
424 | await manager.db.conversations.update_one(
425 | {"id": conversation_id},
426 | {"$set": {"metadata.title_generated": True}}
427 | )
428 |
429 | # Get relevant context from vector store
430 | logger.info("Retrieve Relevant Context...")
431 | context = await vector_store.similarity_search(
432 | query=user_message.content,
433 | k=settings.TOP_K
434 | )
435 |
436 | # Determine if this is a basic conversation or needs context
437 | logger.info("Determine Route Conversation (Basic / RAG)")
438 | is_basic_conversation = len(context) == 0 or all(c['score'] < settings.RAG_THRESHOLD for c in context)
439 |
440 | # Prepare conversation context
441 | conversation_context = ["Conversation History:"]
442 | if conversation:
443 | for message in conversation.messages[settings.N_LAST_MESSAGE:]:
444 | conversation_context.append(f"{message.role}: {message.content}")
445 | conversation_context = "\n".join(conversation_context)
446 |
447 | # Select appropriate system prompt based on query type
448 | if is_basic_conversation:
449 | system_prompt = """
450 | Answer accoding user language, also consider conversation history if necessary to answer question.
451 | You are a helpful and friendly AI assistant.
452 | Engage in natural conversation and provide accurate, concise responses.
453 | If the user mentions something vague or unclear, politely ask for clarification or context
454 | to ensure you provide the most relevant and helpful answer.
455 | If the user refers to specific documents or information,
456 | let them know you can search through the knowledge base to assist them.
457 | """
458 |
459 | messages = [
460 | {"role": "system", "content": system_prompt},
461 | {"role": "user", "content": conversation_context},
462 | {"role": "user", "content": user_message.content}
463 | ]
464 | else:
465 | system_prompt = """
466 | Answer accoding user language, also consider conversation history if necessary to answer question.
467 | You are a helpful AI assistant with access to a knowledge base of documents.
468 | Use the provided context to answer questions accurately and comprehensively.
469 | If the context doesn't fully address the question, acknowledge what you know from the context
470 | and what you're unsure about. Always maintain accuracy over completeness.
471 | """
472 |
473 | messages = [
474 | {"role": "system", "content": system_prompt},
475 | {"role": "user", "content": conversation_context},
476 | {"role": "user", "content": f"""Context: {json.dumps([c['text'] for c in context])}
477 | Question: {user_message.content}"""}
478 | ]
479 |
480 | # Generate response using LLM
481 | logger.info("LLM Generate Response...")
482 | logger.info(f"Message Throw: {messages}")
483 | response = await llm.chat_completion(
484 | messages=messages,
485 | temperature=settings.TEMPERATURE
486 | )
487 |
488 | # Create assistant message
489 | assistant_message = ChatMessage(
490 | role="assistant",
491 | content=response
492 | )
493 | logger.info(f"Generated response for {'basic' if is_basic_conversation else 'context-based'} query")
494 | logger.info(assistant_message.to_dict())
495 |
496 | # Save assistant message
497 | await manager.save_message(conversation_id, assistant_message)
498 |
499 | # Return the response
500 | return ChatResponse(
501 | role=assistant_message.role,
502 | content=assistant_message.content,
503 | timestamp=assistant_message.timestamp
504 | )
505 |
506 | except Exception as e:
507 | traceback.print_exc()
508 | logger.error(f"Error processing message: {str(e)}")
509 | raise HTTPException(
510 | status_code=500,
511 | detail="An error occurred while processing your message"
512 | )
513 |
514 | except HTTPException:
515 | raise
516 | except Exception as e:
517 | traceback.print_exc()
518 | logger.error(f"Chat error: {str(e)}")
519 | raise HTTPException(
520 | status_code=500,
521 | detail="An unexpected error occurred"
522 | )
523 |
524 | @router.post("/{conversation_id}/messages/{message_index}/feedback")
525 | async def submit_feedback(conversation_id: str, message_index: int, feedback: FeedbackRequest):
526 | """Submit feedback for a specific message in a conversation."""
527 | try:
528 | # Get conversation
529 | conversation = await manager.get_conversation_history(conversation_id)
530 | if not conversation:
531 | raise HTTPException(status_code=404, detail="Conversation not found")
532 |
533 | # Validate message index
534 | if message_index < 0 or message_index >= len(conversation.messages):
535 | raise HTTPException(status_code=404, detail="Message not found")
536 |
537 | # Update feedback in the message
538 | now = datetime.utcnow()
539 | await manager.db.conversations.update_one(
540 | {"id": conversation_id},
541 | {"$set": {
542 | f"messages.{message_index}.feedback": {
543 | "thumbs": feedback.thumbs,
544 | "comment": feedback.comment,
545 | "submitted_at": now
546 | }
547 | }}
548 | )
549 |
550 | return {"status": "success", "message": "Feedback submitted successfully"}
551 |
552 | except HTTPException:
553 | raise
554 | except Exception as e:
555 | logger.error(f"Error submitting feedback: {str(e)}")
556 | raise HTTPException(status_code=500, detail="Failed to submit feedback")
--------------------------------------------------------------------------------
/src/router/upload.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import uuid
3 | import traceback
4 | from typing import List
5 | from datetime import datetime
6 |
7 | from fastapi import APIRouter, UploadFile, File, BackgroundTasks, HTTPException
8 | from pydantic import BaseModel
9 | import aiofiles
10 | from loguru import logger
11 |
12 | from ..settings import get_settings
13 | from ..utils.background_tasks import BackgroundTaskManager
14 |
15 | settings = get_settings()
16 | router = APIRouter()
17 | task_manager = BackgroundTaskManager()
18 |
19 | # Ensure upload directory exists
20 | upload_dir = Path(settings.PDF_UPLOAD_DIR)
21 | upload_dir.mkdir(exist_ok=True)
22 |
23 | class TaskResponse(BaseModel):
24 | """Response model for upload tasks."""
25 | task_id: str
26 | file_name: str
27 | status: str
28 | created_at: datetime
29 |
30 | @router.post("/upload", response_model=TaskResponse)
31 | async def upload_single_file(
32 | file: UploadFile = File(...),
33 | background_tasks: BackgroundTasks = None
34 | ):
35 | """
36 | Upload a single PDF file for processing.
37 |
38 | Args:
39 | file: PDF file to upload
40 | background_tasks: FastAPI background tasks
41 |
42 | Returns:
43 | TaskResponse: Task information
44 | """
45 | try:
46 | # Validate file type
47 | if not file.filename.lower().endswith('.pdf'):
48 | raise HTTPException(
49 | status_code=400,
50 | detail="Only PDF files are allowed"
51 | )
52 |
53 | # Generate unique filename
54 | file_id = str(uuid.uuid4())
55 | safe_filename = f"{file_id}_{file.filename}"
56 | file_path = upload_dir / safe_filename
57 |
58 | # Save file
59 | async with aiofiles.open(file_path, 'wb') as f:
60 | content = await file.read()
61 | await f.write(content)
62 |
63 | # Create task
64 | task_id = str(uuid.uuid4())
65 | task_data = {
66 | "task_id": task_id,
67 | "file_name": file.filename,
68 | "file_path": str(file_path),
69 | "status": "pending",
70 | "created_at": datetime.utcnow()
71 | }
72 |
73 | # Store task in MongoDB
74 | await task_manager.mongo_client[settings.MONGODB_DB_NAME].tasks.insert_one(task_data)
75 |
76 | # Start processing in background
77 | background_tasks.add_task(
78 | task_manager.process_pdf_task,
79 | str(file_path),
80 | task_id
81 | )
82 |
83 | logger.info(f"Started processing task {task_id} for file {file.filename}")
84 | return TaskResponse(**task_data)
85 |
86 | except Exception as e:
87 | traceback.print_exc()
88 | logger.error(f"Error processing upload: {str(e)}")
89 | raise HTTPException(
90 | status_code=500,
91 | detail="Error processing upload"
92 | )
93 |
94 | @router.post("/upload/batch", response_model=List[TaskResponse])
95 | async def upload_multiple_files(
96 | files: List[UploadFile] = File(...),
97 | background_tasks: BackgroundTasks = None
98 | ):
99 | """
100 | Upload multiple PDF files for processing.
101 |
102 | Args:
103 | files: List of PDF files to upload
104 | background_tasks: FastAPI background tasks
105 |
106 | Returns:
107 | List[TaskResponse]: List of task information
108 | """
109 | responses = []
110 | for file in files:
111 | try:
112 | response = await upload_single_file(file, background_tasks)
113 | responses.append(response)
114 | except HTTPException as e:
115 | logger.warning(f"Skipping file {file.filename}: {str(e)}")
116 | continue
117 |
118 | if not responses:
119 | traceback.print_exc()
120 | raise HTTPException(
121 | status_code=400,
122 | detail="No valid files were uploaded"
123 | )
124 |
125 | return responses
126 |
127 | @router.get("/task/{task_id}", response_model=TaskResponse)
128 | async def get_task_status(task_id: str):
129 | """
130 | Get the status of a processing task.
131 |
132 | Args:
133 | task_id: Task identifier
134 |
135 | Returns:
136 | TaskResponse: Task information
137 | """
138 | task = await task_manager.mongo_client[settings.MONGODB_DB_NAME].tasks.find_one(
139 | {"task_id": task_id}
140 | )
141 |
142 | if not task:
143 | raise HTTPException(
144 | status_code=404,
145 | detail="Task not found"
146 | )
147 |
148 | return TaskResponse(**task)
--------------------------------------------------------------------------------
/src/settings.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from pydantic_settings import BaseSettings
3 | from functools import lru_cache
4 |
5 |
6 | class Settings(BaseSettings):
7 | """
8 | Application settings.
9 |
10 | Attributes:
11 | # Server Settings
12 | APP_NAME: Name of the application
13 | DEBUG: Debug mode flag
14 | API_V1_PREFIX: API version 1 prefix
15 |
16 | # Processing Settings
17 | MAX_WORKERS: Maximum number of worker threads for background tasks
18 | CHUNK_SIZE: Size of text chunks for document processing (in tokens)
19 | CHUNK_OVERLAP: Overlap between chunks (in tokens)
20 |
21 | # Database Settings
22 | MONGODB_URL: MongoDB connection URL
23 | MONGODB_DB_NAME: MongoDB database name
24 |
25 | # Vector Store Settings
26 | QDRANT_URL: Qdrant server URL
27 | QDRANT_API_KEY: Qdrant API key
28 | COLLECTION_NAME: Name of the vector collection
29 |
30 | # LLM Settings
31 | GROQ_API_KEY: Groq API key
32 | MODEL_NAME: Name of the Groq model to use
33 | MAX_CONTEXT_LENGTH: Maximum context length for the model
34 | TEMPERATURE: Temperature for LLM responses
35 |
36 | # PDF Processing
37 | OCR_ENABLED: Whether to enable OCR for images in PDFs
38 | PDF_UPLOAD_DIR: Directory to store uploaded PDFs
39 | """
40 |
41 | # Server Settings
42 | APP_NAME: str = "Advanced RAG System"
43 | DEBUG: bool = False
44 | API_V1_PREFIX: str = "/api/v1"
45 |
46 | # Processing Settings
47 | MAX_WORKERS: int = 4
48 | CHUNK_SIZE: int = 512
49 | CHUNK_OVERLAP: int = 50
50 | EMBEDDING_LENGTH: int = 768
51 |
52 | # Database Settings
53 | MONGODB_URL: str
54 | MONGODB_DB_NAME: str = "rag_system"
55 |
56 | # Vector Store Settings
57 | QDRANT_URL: str
58 | QDRANT_API_KEY: Optional[str] = None
59 | COLLECTION_NAME: str = "documents"
60 |
61 | # LLM Settings
62 | TOP_K: int = 5 # 25
63 | TOP_K_RERANKER: int = 10
64 | N_LAST_MESSAGE: int = -5
65 | RAG_THRESHOLD: float = 0.6
66 | GROQ_API_KEY: str
67 | MODEL_NAME: str = "mixtral-8x7b-32768" # Groq's Mixtral model
68 | MAX_CONTEXT_LENGTH: int = 8192 # 8k context window
69 | TEMPERATURE: float = 0.7
70 |
71 | # PDF Processing
72 | OCR_ENABLED: bool = True
73 | PDF_UPLOAD_DIR: str = "uploads"
74 |
75 | class Config:
76 | env_file = ".env"
77 | case_sensitive = True
78 |
79 |
80 | @lru_cache()
81 | def get_settings() -> Settings:
82 | """
83 | Get cached settings instance.
84 |
85 | Returns:
86 | Settings: Application settings instance
87 | """
88 | return Settings()
89 |
--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ITSEC-Research/cygen/12682278a30ca52cd5e159907a49b42540ed6486/src/utils/__init__.py
--------------------------------------------------------------------------------
/src/utils/background_tasks.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Dict, Any
3 | from datetime import datetime
4 | import asyncio
5 | from collections import deque
6 |
7 | from tqdm import tqdm
8 | from loguru import logger
9 | from motor.motor_asyncio import AsyncIOMotorClient
10 | from qdrant_client import QdrantClient
11 | from qdrant_client.models import Distance, VectorParams
12 |
13 | from ..settings import get_settings
14 | from .pdf_processor import PDFProcessor
15 | from .vector_store import VectorStore
16 |
17 | settings = get_settings()
18 |
19 | class BackgroundTaskManager:
20 | """
21 | Manages background tasks for PDF processing and vector storage.
22 |
23 | This class handles:
24 | 1. PDF processing using PDFProcessor
25 | 2. Vector storage in Qdrant with FastEmbed
26 | 3. Task status tracking in MongoDB
27 | 4. Task queuing and concurrency control
28 | """
29 |
30 | def __init__(self):
31 | """Initialize the background task manager."""
32 | self.pdf_processor = PDFProcessor()
33 | self.mongo_client = AsyncIOMotorClient(settings.MONGODB_URL)
34 | self.qdrant_client = QdrantClient(
35 | url=settings.QDRANT_URL,
36 | api_key=settings.QDRANT_API_KEY
37 | )
38 | self.vector_store = VectorStore()
39 | self.task_queue = deque()
40 | self.processing_semaphore = asyncio.Semaphore(settings.MAX_WORKERS)
41 | self.queue_processor_task = None
42 |
43 | # Ensure vector collection exists
44 | self._init_vector_collection()
45 | logger.info("Initialized BackgroundTaskManager")
46 |
47 | def _init_vector_collection(self):
48 | """Initialize the vector collection in Qdrant if it doesn't exist."""
49 | try:
50 | collections = self.qdrant_client.get_collections().collections
51 | collection_exists = any(c.name == settings.COLLECTION_NAME for c in collections)
52 |
53 | if collection_exists:
54 | collection_info = self.qdrant_client.get_collection(settings.COLLECTION_NAME)
55 | if collection_info.config.params.model_dump()["vectors"]["size"] != settings.EMBEDDING_LENGTH:
56 | self.qdrant_client.delete_collection(collection_name=settings.COLLECTION_NAME)
57 | self.qdrant_client.create_collection(
58 | collection_name=settings.COLLECTION_NAME,
59 | vectors_config=VectorParams(
60 | size=settings.EMBEDDING_LENGTH,
61 | distance=Distance.COSINE
62 | )
63 | )
64 | else:
65 | self.qdrant_client.create_collection(
66 | collection_name=settings.COLLECTION_NAME,
67 | vectors_config=VectorParams(
68 | size=settings.EMBEDDING_LENGTH,
69 | distance=Distance.COSINE
70 | )
71 | )
72 | logger.info(f"Created vector collection: {settings.COLLECTION_NAME}")
73 | except Exception as e:
74 | logger.error(f"Error initializing vector collection: {str(e)}")
75 | raise
76 |
77 | async def process_pdf_task(self, file_path: str, task_id: str):
78 | """
79 | Process a PDF file in the background.
80 |
81 | Args:
82 | file_path: Path to the PDF file
83 | task_id: Unique identifier for the task
84 | """
85 | # Add task to queue
86 | self.task_queue.append((file_path, task_id))
87 |
88 | # Start queue processor if not running
89 | if self.queue_processor_task is None or self.queue_processor_task.done():
90 | self.queue_processor_task = asyncio.create_task(self._process_queue())
91 |
92 | logger.info(f"Added task {task_id} to queue for file {file_path}")
93 |
94 | async def _process_queue(self):
95 | """Process tasks from the queue with concurrency control."""
96 | while self.task_queue:
97 | async with self.processing_semaphore:
98 | try:
99 | file_path, task_id = self.task_queue.popleft()
100 | await self._process_single_task(file_path, task_id)
101 | except Exception as e:
102 | logger.error(f"Error processing task from queue: {str(e)}")
103 |
104 | async def _process_single_task(self, file_path: str, task_id: str):
105 | """Process a single PDF task."""
106 | try:
107 | # Update task status
108 | await self._update_task_status(task_id, "processing")
109 |
110 | # Process PDF
111 | documents = await self.pdf_processor.process_pdf(file_path)
112 |
113 | # Export HTML versions
114 | output_dir = Path(settings.PDF_UPLOAD_DIR) / "html"
115 | output_dir.mkdir(exist_ok=True)
116 |
117 | html_paths = []
118 | chunk_ids = []
119 |
120 | for document in tqdm(documents, desc="Processing Document"):
121 | # Extract text and metadata
122 | content = document["text"]
123 | metadata = {k: v for k, v in document.items() if k != "text"}
124 |
125 | # Store in vector store
126 | ids = await self.vector_store.add_texts(
127 | texts=[content],
128 | metadatas=[metadata]
129 | )
130 | chunk_ids.extend(ids)
131 |
132 | # Store results
133 | result = {
134 | "status": "completed",
135 | "file_path": file_path,
136 | "html_paths": html_paths,
137 | "chunk_ids": chunk_ids,
138 | "num_pages": len(documents),
139 | "num_chunks": len(chunk_ids),
140 | "completed_at": datetime.utcnow()
141 | }
142 |
143 | await self._update_task_status(task_id, "completed", result)
144 | logger.info(f"Completed task {task_id} with {len(chunk_ids)} chunks")
145 |
146 | except Exception as e:
147 | error_msg = f"Error processing PDF: {str(e)}"
148 | logger.error(error_msg)
149 | await self._update_task_status(task_id, "failed", {"error": error_msg})
150 |
151 | async def _update_task_status(
152 | self,
153 | task_id: str,
154 | status: str,
155 | result: Dict[str, Any] = None
156 | ):
157 | """
158 | Update task status in MongoDB.
159 |
160 | Args:
161 | task_id: Task identifier
162 | status: Current status
163 | result: Optional result data
164 | """
165 | update_data = {
166 | "status": status,
167 | "updated_at": datetime.utcnow()
168 | }
169 | if result:
170 | update_data.update(result)
171 |
172 | await self.mongo_client[settings.MONGODB_DB_NAME].tasks.update_one(
173 | {"task_id": task_id},
174 | {"$set": update_data}
175 | )
--------------------------------------------------------------------------------
/src/utils/llm.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict
2 | from loguru import logger
3 | import groq
4 |
5 | from ..settings import get_settings
6 |
7 | settings = get_settings()
8 |
9 | class GroqLLM:
10 | """
11 | Utility class for interacting with Groq's LLM API.
12 | Handles chat completions with proper context management.
13 | """
14 |
15 | def __init__(self):
16 | """Initialize the Groq client."""
17 | self.client = groq.AsyncGroq(api_key=settings.GROQ_API_KEY)
18 | self.model = settings.MODEL_NAME
19 | logger.info(f"Initialized GroqLLM with model: {self.model}")
20 |
21 | async def chat_completion(
22 | self,
23 | messages: List[Dict[str, str]],
24 | temperature: float = None,
25 | max_tokens: int = None
26 | ) -> str:
27 | """
28 | Generate a chat completion response.
29 |
30 | Args:
31 | messages: List of message dictionaries with 'role' and 'content'
32 | temperature: Optional temperature override
33 | max_tokens: Optional max tokens override
34 |
35 | Returns:
36 | str: Generated response text
37 | """
38 | try:
39 | # Calculate approximate token count
40 | total_chars = sum(len(m["content"]) for m in messages)
41 | approx_tokens = total_chars // 4 # Rough estimate
42 |
43 | # Ensure we don't exceed context window
44 | if max_tokens is None:
45 | max_tokens = min(
46 | settings.MAX_CONTEXT_LENGTH - approx_tokens,
47 | 2048 # Default max response length
48 | )
49 |
50 | # Generate completion
51 | completion = await self.client.chat.completions.create(
52 | model=self.model,
53 | messages=messages,
54 | temperature=temperature or settings.TEMPERATURE,
55 | max_tokens=max_tokens,
56 | stream=False # We'll implement streaming later
57 | )
58 |
59 | # Extract and return the response text
60 | response = completion.choices[0].message.content
61 | return response.strip()
62 |
63 | except Exception as e:
64 | logger.error(f"Error generating chat completion: {str(e)}")
65 | raise
66 |
67 | async def stream_chat_completion(
68 | self,
69 | messages: List[Dict[str, str]],
70 | temperature: float = None,
71 | max_tokens: int = None
72 | ):
73 | """
74 | Generate a streaming chat completion response.
75 |
76 | Args:
77 | messages: List of message dictionaries with 'role' and 'content'
78 | temperature: Optional temperature override
79 | max_tokens: Optional max tokens override
80 |
81 | Yields:
82 | str: Generated response text chunks
83 | """
84 | try:
85 | # Calculate approximate token count
86 | total_chars = sum(len(m["content"]) for m in messages)
87 | approx_tokens = total_chars // 4 # Rough estimate
88 |
89 | # Ensure we don't exceed context window
90 | if max_tokens is None:
91 | max_tokens = min(
92 | settings.MAX_CONTEXT_LENGTH - approx_tokens,
93 | 2048 # Default max response length
94 | )
95 |
96 | # Generate streaming completion
97 | stream = await self.client.chat.completions.create(
98 | model=self.model,
99 | messages=messages,
100 | temperature=temperature or settings.TEMPERATURE,
101 | max_tokens=max_tokens,
102 | stream=True
103 | )
104 |
105 | # Yield response chunks
106 | async for chunk in stream:
107 | if chunk.choices[0].delta.content:
108 | yield chunk.choices[0].delta.content
109 |
110 | except Exception as e:
111 | logger.error(f"Error generating streaming chat completion: {str(e)}")
112 | raise
--------------------------------------------------------------------------------
/src/utils/pdf_processor.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict, Any
2 | import asyncio
3 | from concurrent.futures import ThreadPoolExecutor
4 | from PyPDF2 import PdfReader
5 | from loguru import logger
6 |
7 | from ..settings import get_settings
8 | from .text_chunking import chunk_text_recursive
9 | from .vector_store import VectorStore
10 |
11 | settings = get_settings()
12 |
13 | class PDFProcessor:
14 | """
15 | PDF processor that extracts text directly from PDFs.
16 |
17 | This class handles:
18 | 1. PDF text extraction
19 | 2. Text chunking for vector storage
20 | 3. Background processing with configurable threads
21 | """
22 |
23 | def __init__(self):
24 | """Initialize the PDF processor."""
25 | self.executor = ThreadPoolExecutor(max_workers=settings.MAX_WORKERS)
26 | self.vector_store = VectorStore()
27 | self.semaphore = asyncio.Semaphore(settings.MAX_WORKERS)
28 | logger.info(f"Initialized PDFProcessor with {settings.MAX_WORKERS} workers")
29 |
30 | async def process_pdf(self, pdf_path: str) -> List[Dict[str, Any]]:
31 | """
32 | Process a PDF file asynchronously.
33 |
34 | Args:
35 | pdf_path: Path to the PDF file
36 |
37 | Returns:
38 | List[Dict[str, Any]]: List of processed text chunks with metadata
39 | """
40 | try:
41 | async with self.semaphore: # Limit concurrent processing
42 | # Extract text from PDF pages
43 | pages = await self._extract_text_from_pdf(pdf_path)
44 |
45 | # Process chunks in batches
46 | all_chunks = []
47 | chunk_tasks = []
48 |
49 | for page_num, page_text in enumerate(pages, 1):
50 | if not page_text.strip():
51 | continue
52 |
53 | # Create chunk processing task
54 | task = asyncio.create_task(self._process_page(
55 | page_text=page_text,
56 | page_num=page_num,
57 | total_pages=len(pages),
58 | pdf_path=pdf_path
59 | ))
60 | chunk_tasks.append(task)
61 |
62 | # Wait for all chunk processing to complete
63 | chunk_results = await asyncio.gather(*chunk_tasks)
64 | for chunks in chunk_results:
65 | all_chunks.extend(chunks)
66 |
67 | logger.info(f"Successfully processed PDF: {pdf_path} into {len(all_chunks)} chunks")
68 | return all_chunks
69 |
70 | except Exception as e:
71 | logger.error(f"Error processing PDF {pdf_path}: {str(e)}")
72 | raise
73 |
74 | async def _process_page(
75 | self,
76 | page_text: str,
77 | page_num: int,
78 | total_pages: int,
79 | pdf_path: str
80 | ) -> List[Dict[str, Any]]:
81 | """Process a single page of text asynchronously."""
82 | try:
83 | # Run chunking in thread pool to avoid blocking
84 | loop = asyncio.get_event_loop()
85 | chunks = await loop.run_in_executor(
86 | self.executor,
87 | chunk_text_recursive,
88 | page_text,
89 | settings.CHUNK_SIZE,
90 | settings.CHUNK_OVERLAP,
91 | {
92 | "file_path": pdf_path,
93 | "page_number": page_num,
94 | "total_pages": total_pages
95 | }
96 | )
97 | return chunks
98 |
99 | except Exception as e:
100 | logger.error(f"Error processing page {page_num}: {str(e)}")
101 | raise
102 |
103 | async def _extract_text_from_pdf(self, pdf_path: str) -> List[str]:
104 | """
105 | Extract text from each page of the PDF.
106 |
107 | Args:
108 | pdf_path: Path to the PDF file
109 |
110 | Returns:
111 | List[str]: List of text content from each page
112 | """
113 | def _extract():
114 | try:
115 | reader = PdfReader(pdf_path)
116 | pages = []
117 | for page in reader.pages:
118 | text = page.extract_text()
119 | text = text.strip()
120 | text = '\n'.join(line.strip() for line in text.splitlines() if line.strip())
121 | pages.append(text)
122 |
123 | logger.info(f"Extracted text from {len(pages)} pages in {pdf_path}")
124 | return pages
125 |
126 | except Exception as e:
127 | logger.error(f"Error extracting text from PDF {pdf_path}: {str(e)}")
128 | raise
129 |
130 | # Run extraction in thread pool
131 | return await asyncio.get_event_loop().run_in_executor(
132 | self.executor, _extract
133 | )
134 |
135 | def get_text_statistics(self, chunks: List[Dict[str, Any]]) -> Dict[str, Any]:
136 | """
137 | Get statistics about the processed text.
138 |
139 | Args:
140 | chunks: List of text chunks with metadata
141 |
142 | Returns:
143 | Dict[str, Any]: Statistics about the text
144 | """
145 | total_chars = sum(len(chunk["text"]) for chunk in chunks)
146 | total_chunks = len(chunks)
147 | avg_chunk_size = total_chars / total_chunks if total_chunks > 0 else 0
148 |
149 | return {
150 | "total_chunks": total_chunks,
151 | "total_characters": total_chars,
152 | "average_chunk_size": avg_chunk_size,
153 | "chunks_per_page": total_chunks / chunks[0]["total_pages"] if chunks else 0
154 | }
--------------------------------------------------------------------------------
/src/utils/text_chunking.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict, Any
2 | from langchain.text_splitter import RecursiveCharacterTextSplitter
3 | from loguru import logger
4 |
5 | def chunk_text_recursive(
6 | text: str,
7 | chunk_size: int = 512,
8 | chunk_overlap: int = 50,
9 | metadata: Dict[str, Any] = None
10 | ) -> List[Dict[str, Any]]:
11 | """
12 | Split text into chunks using recursive character text splitter.
13 | This method is more context-aware than simple character splitting.
14 |
15 | Args:
16 | text: Text to split
17 | chunk_size: Maximum size of each chunk
18 | chunk_overlap: Number of characters to overlap between chunks
19 | metadata: Optional metadata to attach to each chunk
20 |
21 | Returns:
22 | List of dictionaries containing chunk text and metadata
23 | """
24 | try:
25 | # Initialize the recursive splitter
26 | splitter = RecursiveCharacterTextSplitter(
27 | chunk_size=chunk_size,
28 | chunk_overlap=chunk_overlap,
29 | length_function=len,
30 | separators=["\n\n", "\n"]
31 | )
32 |
33 | # Split the text
34 | chunks = splitter.split_text(text)
35 |
36 | # Prepare chunk documents with metadata
37 | chunk_docs = []
38 | for i, chunk in enumerate(chunks):
39 | doc = {
40 | "text": chunk,
41 | "chunk_index": i,
42 | "total_chunks": len(chunks)
43 | }
44 | if metadata:
45 | doc.update(metadata)
46 | chunk_docs.append(doc)
47 |
48 | logger.info(f"Split text into {len(chunks)} chunks")
49 | return chunk_docs
50 |
51 | except Exception as e:
52 | logger.error(f"Error chunking text: {str(e)}")
53 | raise
--------------------------------------------------------------------------------
/src/utils/vector_store.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict, Any
2 | import uuid
3 |
4 | from loguru import logger
5 | from fastembed import TextEmbedding
6 | from qdrant_client import QdrantClient
7 | from fastembed.rerank.cross_encoder import TextCrossEncoder
8 | from qdrant_client.http.models import PointStruct, Distance, VectorParams
9 |
10 | from ..settings import get_settings
11 |
12 | settings = get_settings()
13 |
14 | class VectorStore:
15 | """
16 | Vector store utility using FastEmbed and Qdrant.
17 | Uses nomic-embed-text-v1.5 for high-quality embeddings.
18 | """
19 |
20 | def __init__(self):
21 | """Initialize the vector store with FastEmbed and Qdrant."""
22 |
23 | # Initialize Reranker
24 | self.reranker = TextCrossEncoder(
25 | model_name="Xenova/ms-marco-MiniLM-L-12-v2"
26 | )
27 |
28 | # Initialize FastEmbed
29 | self.embedding_model = TextEmbedding(
30 | model_name="nomic-ai/nomic-embed-text-v1.5",
31 | max_length=settings.EMBEDDING_LENGTH
32 | )
33 |
34 | # Initialize Qdrant client
35 | self.qdrant = QdrantClient(
36 | url=settings.QDRANT_URL,
37 | api_key=settings.QDRANT_API_KEY
38 | )
39 |
40 | # Ensure collection exists
41 | self._init_collection()
42 | logger.info("Initialized VectorStore")
43 |
44 | def _init_collection(self):
45 | """Initialize the vector collection if it doesn't exist."""
46 | try:
47 | collections = self.qdrant.get_collections().collections
48 | if not any(c.name == settings.COLLECTION_NAME for c in collections):
49 | self.qdrant.create_collection(
50 | collection_name=settings.COLLECTION_NAME,
51 | vectors_config=VectorParams(
52 | size=settings.EMBEDDING_LENGTH,
53 | distance=Distance.COSINE
54 | )
55 | )
56 | logger.info(f"Created vector collection: {settings.COLLECTION_NAME}")
57 | except Exception as e:
58 | logger.error(f"Error initializing vector collection: {str(e)}")
59 | raise
60 |
61 | async def add_texts(
62 | self,
63 | texts: List[str],
64 | metadatas: List[Dict[str, Any]] = None
65 | ) -> List[str]:
66 | """
67 | Add texts to the vector store.
68 |
69 | Args:
70 | texts: List of texts to add
71 | metadatas: Optional list of metadata dicts
72 |
73 | Returns:
74 | List of IDs for the added texts
75 | """
76 | if not self.qdrant.collection_exists(collection_name=settings.COLLECTION_NAME):
77 | self._init_collection()
78 | try:
79 | points = []
80 | embeddings = list(self.embedding_model.embed(texts))
81 | ids = [str(uuid.uuid4()) for _ in texts]
82 | for i, (text, embedding) in enumerate(zip(texts, embeddings)):
83 | point = PointStruct(
84 | id=ids[i],
85 | vector=embedding.tolist(),
86 | payload={
87 | "text": text,
88 | **(metadatas[i] if metadatas else {})
89 | }
90 | )
91 | points.append(point)
92 |
93 | # Upload to Qdrant
94 | self.qdrant.upsert(
95 | collection_name=settings.COLLECTION_NAME,
96 | points=points
97 | )
98 |
99 | logger.info(f"Added {len(texts)} texts to vector store")
100 | return ids
101 |
102 | except Exception as e:
103 | logger.error(f"Error adding texts to vector store: {str(e)}")
104 | raise
105 |
106 | async def similarity_search(
107 | self,
108 | query: str,
109 | k: int = 4,
110 | filter: Dict[str, Any] = None
111 | ) -> List[Dict[str, Any]]:
112 | """
113 | Search for similar texts in the vector store.
114 |
115 | Args:
116 | query: Query text
117 | k: Number of results to return
118 | filter: Optional filter for the search
119 |
120 | Returns:
121 | List of similar documents with scores
122 | """
123 | try:
124 | # Generate query embedding
125 | query_embedding = list(self.embedding_model.embed([query]))[0]
126 |
127 | # Search in Qdrant
128 | results = self.qdrant.search(
129 | collection_name=settings.COLLECTION_NAME,
130 | query_vector=query_embedding.tolist(),
131 | limit=k,
132 | query_filter=filter
133 | )
134 |
135 | # Format results
136 | docs = []
137 | for res in results:
138 | doc = {
139 | "id": res.id,
140 | "score": res.score,
141 | **res.payload
142 | }
143 | docs.append(doc)
144 |
145 | # Re Ranking Document
146 |
147 | return docs
148 |
149 | except Exception as e:
150 | logger.error(f"Error searching vector store: {str(e)}")
151 | raise
--------------------------------------------------------------------------------
/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e # Exit on error
3 |
4 | # Check if .env file exists
5 | if [ ! -f ".env" ]; then
6 | echo "⚠️ .env file not found! Creating from example..."
7 | if [ -f ".env.example" ]; then
8 | cp .env.example .env
9 | echo "✅ Created .env from .env.example. Please update the values as needed."
10 | else
11 | echo "❌ .env.example not found! Please create a .env file manually."
12 | exit 1
13 | fi
14 | fi
15 |
16 | # Define colors for output
17 | GREEN='\033[0;32m'
18 | YELLOW='\033[1;33m'
19 | BLUE='\033[0;34m'
20 | NC='\033[0m' # No Color
21 |
22 | # Print header
23 | echo -e "${BLUE}╔════════════════════════════════════════════════╗${NC}"
24 | echo -e "${BLUE}║ ${GREEN}RAG System Launcher${BLUE} ║${NC}"
25 | echo -e "${BLUE}╚════════════════════════════════════════════════╝${NC}"
26 |
27 | # Show options
28 | echo -e "${YELLOW}Choose a launch option:${NC}"
29 | echo -e " ${GREEN}1)${NC} Start full system with Docker Compose"
30 | echo -e " ${GREEN}2)${NC} Start FastAPI backend only"
31 | echo -e " ${GREEN}3)${NC} Start Streamlit frontend only"
32 | echo -e " ${GREEN}4)${NC} Quit"
33 |
34 | # Get user input
35 | read -p "Enter your choice [1-4]: " choice
36 |
37 | case $choice in
38 | 1)
39 | echo -e "${YELLOW}Starting both backend and frontend with Docker Compose...${NC}"
40 | docker-compose up --build
41 | ;;
42 | 2)
43 | echo -e "${YELLOW}Starting FastAPI backend service...${NC}"
44 | docker-compose up --build app
45 | ;;
46 | 3)
47 | echo -e "${YELLOW}Starting Streamlit frontend service...${NC}"
48 |
49 | # Check if Docker is preferred
50 | read -p "Use Docker for Streamlit? (y/n): " use_docker
51 |
52 | if [[ $use_docker == "y" || $use_docker == "Y" ]]; then
53 | echo -e "${YELLOW}Starting Streamlit in Docker...${NC}"
54 | docker-compose up --build streamlit
55 | else
56 | echo -e "${YELLOW}Starting Streamlit directly...${NC}"
57 |
58 | # Check for Python virtual environment
59 | if [ -d "venv" ]; then
60 | source venv/bin/activate
61 | elif [ -d ".venv" ]; then
62 | source .venv/bin/activate
63 | else
64 | echo -e "${YELLOW}No Python virtual environment found. Using system Python.${NC}"
65 | fi
66 |
67 | # Check for installed dependencies
68 | if ! pip show streamlit &> /dev/null; then
69 | echo -e "${YELLOW}Streamlit not found. Installing dependencies...${NC}"
70 | pip install -e .
71 | fi
72 |
73 | # Start Streamlit
74 | cd streamlit && ./run.sh
75 | fi
76 | ;;
77 | 4)
78 | echo -e "${YELLOW}Exiting...${NC}"
79 | exit 0
80 | ;;
81 | *)
82 | echo -e "${YELLOW}Invalid choice. Exiting.${NC}"
83 | exit 1
84 | ;;
85 | esac
--------------------------------------------------------------------------------
/streamlit/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.11-slim
2 |
3 | WORKDIR /app
4 |
5 | # Set environment variables
6 | ENV PYTHONUNBUFFERED=1 \
7 | PYTHONDONTWRITEBYTECODE=1 \
8 | UV_SYSTEM_PYTHON=1 \
9 | PATH="/root/.local/bin:$PATH" \
10 | STREAMLIT_SERVER_PORT=8501 \
11 | STREAMLIT_SERVER_HEADLESS=true \
12 | STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
13 |
14 | # Install system dependencies
15 | RUN apt-get update && apt-get install -y --no-install-recommends \
16 | build-essential \
17 | curl \
18 | && rm -rf /var/lib/apt/lists/*
19 |
20 | # Install uv
21 | RUN curl -LsSf https://astral.sh/uv/install.sh | sh
22 |
23 | # Copy project files for dependency installation
24 | COPY pyproject.toml .
25 | COPY .env.example .
26 |
27 | # Create required directories
28 | RUN mkdir -p uploads logs && chmod 777 uploads logs
29 |
30 | # Create venv and install dependencies
31 | RUN uv venv && \
32 | . .venv/bin/activate && \
33 | uv sync
34 |
35 | # Copy Streamlit application files
36 | COPY streamlit/ ./streamlit/
37 |
38 | # Create a copy of the app.py directly in the root for easy access
39 | RUN cp ./streamlit/app.py ./streamlit/docker-app.py /app/ || true
40 |
41 | # Set working directory to the streamlit directory
42 | WORKDIR /app/streamlit
43 |
44 | # Make sure the run script is directly available and executable
45 | COPY streamlit/run.sh /app/streamlit/run.sh
46 | RUN chmod +x /app/streamlit/run.sh
47 |
48 | # Also copy it to the root as a fallback
49 | COPY streamlit/run.sh /run.sh
50 | RUN chmod +x /run.sh
51 |
52 | # Expose Streamlit port
53 | EXPOSE 8501
54 |
55 | # Health check
56 | HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
57 | CMD curl --fail http://localhost:8501/_stcore/health || exit 1
58 |
59 | # Start Streamlit app
60 | ENTRYPOINT ["/run.sh"]
--------------------------------------------------------------------------------
/streamlit/app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import httpx
3 | import os
4 | from typing import Dict, List, Any, Optional
5 | from datetime import datetime
6 |
7 | # Configure the app
8 | st.set_page_config(
9 | page_title="RAG Chat System",
10 | page_icon="📚",
11 | layout="wide",
12 | initial_sidebar_state="expanded"
13 | )
14 |
15 | # Constants
16 | API_URL = os.getenv("API_URL", "http://localhost:8000")
17 | API_PREFIX = "/api/v1"
18 |
19 | # State management
20 | if "messages" not in st.session_state:
21 | st.session_state.messages = []
22 |
23 | if "conversation_id" not in st.session_state:
24 | st.session_state.conversation_id = None
25 |
26 |
27 | def get_api_url(endpoint: str) -> str:
28 | """Get the full API URL for an endpoint."""
29 | return f"{API_URL}{API_PREFIX}{endpoint}"
30 |
31 |
32 | def load_conversations() -> List[Dict[str, Any]]:
33 | """Load all conversations from the API."""
34 | try:
35 | response = httpx.get(get_api_url("/chat/conversations"))
36 | if response.status_code == 200:
37 | return response.json()
38 | else:
39 | st.error(f"Failed to load conversations: {response.text}")
40 | return []
41 | except Exception as _:
42 | return []
43 |
44 |
45 | def create_conversation() -> Optional[str]:
46 | """Create a new conversation and return its ID."""
47 | try:
48 | response = httpx.put(get_api_url("/chat/conversation"))
49 | if response.status_code == 200:
50 | return response.json()["id"]
51 | else:
52 | st.error(f"Failed to create conversation: {response.text}")
53 | return None
54 | except Exception as e:
55 | st.error(f"Error creating conversation: {str(e)}")
56 | return None
57 |
58 |
59 | def get_conversation(conversation_id: str) -> Dict[str, Any]:
60 | """Get a conversation by ID."""
61 | try:
62 | response = httpx.get(get_api_url(f"/chat/conversations/{conversation_id}"))
63 | if response.status_code == 200:
64 | return response.json()
65 | else:
66 | st.error(f"Failed to get conversation: {response.text}")
67 | return {}
68 | except Exception as _:
69 | return {}
70 |
71 |
72 | def upload_document(file) -> bool:
73 | """Upload a document to the API."""
74 | try:
75 | files = {"file": (file.name, file.getvalue(), "application/pdf")}
76 | response = httpx.post(get_api_url("/documents/upload"), files=files)
77 | if response.status_code == 200:
78 | return True
79 | else:
80 | st.error(f"Failed to upload document: {response.text}")
81 | return False
82 | except Exception as e:
83 | st.error(f"Error uploading document: {str(e)}")
84 | return False
85 |
86 |
87 | def get_task_status(task_id: str) -> Dict[str, Any]:
88 | """Get the status of a document processing task."""
89 | try:
90 | response = httpx.get(get_api_url(f"/documents/task/{task_id}"))
91 | if response.status_code == 200:
92 | return response.json()
93 | else:
94 | return {"status": "failed"}
95 | except Exception:
96 | return {"status": "failed"}
97 |
98 |
99 | def submit_feedback(conversation_id: str, message_index: int, thumbs: str, comment: str | None = None) -> bool:
100 | """Submit feedback for a message."""
101 | try:
102 | response = httpx.post(
103 | get_api_url(f"/chat/{conversation_id}/messages/{message_index}/feedback"),
104 | json={"thumbs": thumbs, "comment": comment},
105 | timeout=10.0
106 | )
107 | return response.status_code == 200
108 | except Exception as e:
109 | st.error(f"Error submitting feedback: {str(e)}")
110 | return False
111 |
112 |
113 | def format_message(msg: Dict[str, Any], message_index: int) -> None:
114 | """Format and display a message in the chat UI."""
115 | role = msg.get("role", "")
116 | content = msg.get("content", "")
117 | feedback = msg.get("feedback", {})
118 |
119 | if role == "user":
120 | st.chat_message("user").write(content)
121 | elif role == "assistant":
122 | with st.chat_message("assistant"):
123 | st.write(content)
124 |
125 | # Only show feedback for assistant messages
126 | if not feedback.get("submitted_at"):
127 | # Use st.feedback for thumbs up/down
128 | selected = st.feedback(
129 | "thumbs",
130 | key=f"feedback_{message_index}"
131 | )
132 |
133 | # Handle feedback submission
134 | if selected is not None:
135 | thumbs = "down" if selected == 0 else "up"
136 |
137 | # For thumbs down, show comment input
138 | comment = None
139 | if selected == 0:
140 | comment = st.text_input(
141 | "What was wrong with this response?",
142 | key=f"feedback_comment_{message_index}"
143 | )
144 | if st.button("Submit Feedback", key=f"submit_feedback_{message_index}"):
145 | if submit_feedback(st.session_state.conversation_id, message_index, thumbs, comment):
146 | st.warning("Thank you for your feedback!")
147 | else:
148 | # For thumbs up, submit immediately
149 | if submit_feedback(st.session_state.conversation_id, message_index, thumbs):
150 | st.success("Thank you for your feedback!")
151 | else:
152 | # Show submitted feedback
153 | feedback_icon = "👍" if feedback["thumbs"] == "up" else "👎"
154 | st.caption(f"Feedback: {feedback_icon}")
155 | if feedback.get("comment"):
156 | st.caption(f"Comment: {feedback['comment']}")
157 |
158 | elif role == "system":
159 | st.chat_message("system").write(content)
160 |
161 |
162 | def send_message(conversation_id: str, message: str) -> Optional[Dict[str, Any]]:
163 | """Send a message to the chat API and return the response."""
164 | try:
165 | response = httpx.post(
166 | get_api_url(f"/chat/{conversation_id}"),
167 | json={"message": message},
168 | timeout=60.0 # Increased timeout for long responses
169 | )
170 |
171 | if response.status_code == 200:
172 | return response.json()
173 | else:
174 | st.error(f"Failed to send message: {response.text}")
175 | return None
176 | except Exception as e:
177 | st.error(f"Error sending message: {str(e)}")
178 | return None
179 |
180 |
181 | def load_conversation_history(conversation_id: str) -> None:
182 | """Load conversation history and update the UI."""
183 | conversation = get_conversation(conversation_id)
184 | if conversation and "messages" in conversation:
185 | st.session_state.messages = conversation["messages"]
186 |
187 |
188 | # UI Components
189 | def sidebar():
190 | """Render the sidebar with conversations and document upload."""
191 | st.sidebar.title("RAG Chat System")
192 |
193 | # Document Upload Section
194 | st.sidebar.header("📤 Upload Documents")
195 | uploaded_file = st.sidebar.file_uploader("Upload PDF", type="pdf")
196 |
197 | if uploaded_file and st.sidebar.button("Process Document"):
198 | with st.sidebar.status("Uploading document...") as status:
199 | if upload_document(uploaded_file):
200 | status.update(label="Document uploaded successfully!", state="complete")
201 | st.sidebar.success(f"Document '{uploaded_file.name}' uploaded and being processed.")
202 | else:
203 | status.update(label="Failed to upload document", state="error")
204 |
205 | # Conversation Management
206 | st.sidebar.header("💬 Conversations")
207 |
208 | if st.sidebar.button("New Conversation"):
209 | with st.spinner("Creating new conversation..."):
210 | # Create a new conversation
211 | conversation_id = create_conversation()
212 | if conversation_id:
213 | st.session_state.conversation_id = conversation_id
214 | st.session_state.messages = []
215 | st.sidebar.success("New conversation created!")
216 | st.rerun()
217 | else:
218 | st.sidebar.error("Failed to create new conversation.")
219 |
220 | # List existing conversations
221 | conversations = load_conversations()
222 | if conversations:
223 | st.sidebar.subheader("Select Conversation")
224 | for conv in conversations:
225 | conv_id = conv.get("id", "")
226 | title = conv.get("title", "Untitled")
227 | created_at = conv.get("created_at", "")
228 |
229 | # Format the date if it exists
230 | if created_at:
231 | try:
232 | # Parse ISO format or timestamp
233 | if isinstance(created_at, str):
234 | created_date = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
235 | else:
236 | created_date = datetime.fromtimestamp(created_at)
237 |
238 | date_str = created_date.strftime("%Y-%m-%d %H:%M")
239 | except Exception as _:
240 | date_str = "Unknown date"
241 | else:
242 | date_str = "Unknown date"
243 |
244 | # Create a button for each conversation
245 | if st.sidebar.button(f"{title} ({date_str})", key=f"conv_{conv_id}"):
246 | st.session_state.conversation_id = conv_id
247 | load_conversation_history(conv_id)
248 | st.rerun()
249 |
250 | # About section
251 | st.sidebar.header("ℹ️ About")
252 | st.sidebar.info(
253 | """
254 | This is a RAG (Retrieval-Augmented Generation) chat system.
255 | Upload documents and ask questions about them.
256 |
257 | The system will retrieve relevant information from your documents
258 | to provide accurate and contextual responses.
259 | """
260 | )
261 |
262 |
263 | def main_content():
264 | """Render the main chat interface."""
265 | st.title("RAG Chat System")
266 |
267 | # Check if we have an active conversation
268 | if st.session_state.conversation_id is None:
269 | st.info("👈 Create a new conversation or select an existing one from the sidebar.")
270 | return
271 |
272 | # Display conversation title
273 | conversation = get_conversation(st.session_state.conversation_id)
274 | if conversation:
275 | st.subheader(f"Conversation: {conversation.get('title', 'Untitled')}")
276 |
277 | # Display chat messages
278 | for message_index, message in enumerate(st.session_state.messages):
279 | format_message(message, message_index)
280 |
281 | # Chat input
282 | if prompt := st.chat_input("Ask a question about your documents..."):
283 | # Add user message to UI
284 | st.chat_message("user").write(prompt)
285 |
286 | # Add to session state
287 | user_message = {
288 | "role": "user",
289 | "content": prompt
290 | }
291 | st.session_state.messages.append(user_message)
292 |
293 | # Send message and get response
294 | with st.spinner("Thinking..."):
295 | response = send_message(st.session_state.conversation_id, prompt)
296 |
297 | if response:
298 | # Add assistant message to session state and display it
299 | assistant_message = {
300 | "role": response["role"],
301 | "content": response["content"]
302 | }
303 | st.session_state.messages.append(assistant_message)
304 | format_message(assistant_message, len(st.session_state.messages) - 1)
305 | else:
306 | st.error("Failed to get response. Please try again.")
307 |
308 |
309 | # Main app layout
310 | def main():
311 | sidebar()
312 | main_content()
313 |
314 |
315 | if __name__ == "__main__":
316 | main()
--------------------------------------------------------------------------------
/streamlit/config.toml:
--------------------------------------------------------------------------------
1 | [theme]
2 | primaryColor = "#4682B4" # Steel Blue
3 | backgroundColor = "#F0F2F6"
4 | secondaryBackgroundColor = "#E0E4E8"
5 | textColor = "#262730"
6 | font = "sans serif"
7 |
8 | [server]
9 | port = 8501
10 | maxUploadSize = 200
11 | enableCORS = true
12 | enableXsrfProtection = true
13 | headless = true
14 |
15 | [browser]
16 | gatherUsageStats = false
17 |
18 | [runner]
19 | magicEnabled = true
20 | installTracer = false
21 | fixMatplotlib = true
22 |
23 | [logger]
24 | level = "info"
--------------------------------------------------------------------------------
/streamlit/run-docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e # Exit on error
3 |
4 | # Define colors for output
5 | GREEN='\033[0;32m'
6 | YELLOW='\033[1;33m'
7 | BLUE='\033[0;34m'
8 | NC='\033[0m' # No Color
9 |
10 | # Print header
11 | echo -e "${BLUE}╔════════════════════════════════════════════════╗${NC}"
12 | echo -e "${BLUE}║ ${GREEN}Streamlit Docker Runner${BLUE} ║${NC}"
13 | echo -e "${BLUE}╚════════════════════════════════════════════════╝${NC}"
14 |
15 | # Check if Docker is installed
16 | if ! command -v docker &> /dev/null; then
17 | echo -e "${YELLOW}Docker is not installed. Please install Docker to continue.${NC}"
18 | exit 1
19 | fi
20 |
21 | # Check if docker-compose.yml exists
22 | if [ ! -f "docker-compose.yml" ]; then
23 | echo -e "${YELLOW}docker-compose.yml not found. Make sure you're in the streamlit directory.${NC}"
24 | exit 1
25 | fi
26 |
27 | echo -e "${YELLOW}Building and starting Streamlit container...${NC}"
28 | echo -e "${YELLOW}This will connect to a FastAPI backend running on your host machine.${NC}"
29 | echo -e "${YELLOW}Make sure the FastAPI backend is running on port 8000.${NC}"
30 |
31 | # Start with docker-compose
32 | docker-compose build
33 | docker-compose up
34 |
35 | # Script never reaches here if docker-compose up is running in foreground
--------------------------------------------------------------------------------
/streamlit/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e # Exit on error
3 |
4 | # Activate virtual environment if it exists
5 | if [ -d "/app/.venv" ]; then
6 | echo "Activating virtual environment..."
7 | . /app/.venv/bin/activate
8 | fi
9 |
10 | # Load environment variables from parent .env file if exists
11 | if [ -f "../.env" ]; then
12 | echo "Loading environment variables from ../.env"
13 | export $(grep -v '^#' ../.env | xargs)
14 | fi
15 |
16 | # Set default values
17 | export API_URL=${API_URL:-"http://localhost:8000"}
18 | export WS_URL=${WS_URL:-"ws://localhost:8000"}
19 | export STREAMLIT_SERVER_PORT=${STREAMLIT_SERVER_PORT:-8501}
20 | export STREAMLIT_SERVER_HEADLESS=${STREAMLIT_SERVER_HEADLESS:-true}
21 | export STREAMLIT_BROWSER_GATHER_USAGE_STATS=${STREAMLIT_BROWSER_GATHER_USAGE_STATS:-false}
22 |
23 | # Print environment settings
24 | echo "Starting Streamlit app with:"
25 | echo " - API_URL: $API_URL"
26 | echo " - WS_URL: $WS_URL"
27 | echo " - STREAMLIT_SERVER_PORT: $STREAMLIT_SERVER_PORT"
28 | echo " - Python executable: $(which python)"
29 |
30 | # Check app files and run the appropriate one
31 | if [ -f "app.py" ]; then
32 | echo "Using app.py for Streamlit"
33 | exec streamlit run app.py
34 | else
35 | echo "ERROR: No Streamlit app found in $(pwd)!"
36 | echo "Directory contents:"
37 | ls -la
38 | exit 1
39 | fi
--------------------------------------------------------------------------------
/streamlit/utils.py:
--------------------------------------------------------------------------------
1 | import httpx
2 | import os
3 | import time
4 | import streamlit as st
5 | from typing import Dict, List, Any, Optional, Tuple
6 | from datetime import datetime
7 |
8 | # Constants
9 | API_URL = os.getenv("API_URL", "http://localhost:8000")
10 | API_PREFIX = "/api/v1"
11 |
12 | def get_api_url(endpoint: str) -> str:
13 | """Get the full API URL for an endpoint."""
14 | return f"{API_URL}{API_PREFIX}{endpoint}"
15 |
16 | # Document Management
17 | def upload_document(file) -> Tuple[bool, Optional[str]]:
18 | """
19 | Upload a document to the API.
20 |
21 | Args:
22 | file: The uploaded file object from Streamlit
23 |
24 | Returns:
25 | Tuple of (success, task_id or None)
26 | """
27 | try:
28 | files = {"file": (file.name, file.getvalue(), "application/pdf")}
29 | response = httpx.post(get_api_url("/documents/upload"), files=files)
30 |
31 | if response.status_code == 200:
32 | return True, response.json().get("task_id")
33 | else:
34 | st.error(f"Failed to upload document: {response.text}")
35 | return False, None
36 | except Exception as e:
37 | st.error(f"Error uploading document: {str(e)}")
38 | return False, None
39 |
40 | def poll_task_status(task_id: str, max_attempts: int = 60, interval: float = 2.0) -> Dict[str, Any]:
41 | """
42 | Poll the task status until it completes or fails.
43 |
44 | Args:
45 | task_id: The ID of the task to poll
46 | max_attempts: Maximum number of polling attempts
47 | interval: Time interval between polls in seconds
48 |
49 | Returns:
50 | Task status information
51 | """
52 | for attempt in range(max_attempts):
53 | try:
54 | response = httpx.get(get_api_url(f"/documents/task/{task_id}"))
55 | if response.status_code == 200:
56 | task_data = response.json()
57 | status = task_data.get("status", "")
58 |
59 | if status in ["completed", "failed"]:
60 | return task_data
61 |
62 | # Add a small delay before the next poll
63 | time.sleep(interval)
64 | else:
65 | return {"status": "failed", "error": f"Failed to get task status: {response.text}"}
66 | except Exception as e:
67 | return {"status": "failed", "error": f"Error polling task: {str(e)}"}
68 |
69 | return {"status": "timeout", "error": "Task polling timed out"}
70 |
71 | # Conversation Management
72 | def create_conversation() -> Optional[str]:
73 | """Create a new conversation and return its ID."""
74 | try:
75 | response = httpx.put(get_api_url("/chat/conversation"))
76 | if response.status_code == 200:
77 | return response.json()["id"]
78 | else:
79 | st.error(f"Failed to create conversation: {response.text}")
80 | return None
81 | except Exception as e:
82 | st.error(f"Error creating conversation: {str(e)}")
83 | return None
84 |
85 | def get_conversations(skip: int = 0, limit: int = 20) -> List[Dict[str, Any]]:
86 | """Get a list of conversations with pagination."""
87 | try:
88 | response = httpx.get(get_api_url(f"/chat/conversations?skip={skip}&limit={limit}"))
89 | if response.status_code == 200:
90 | return response.json()
91 | else:
92 | st.error(f"Failed to get conversations: {response.text}")
93 | return []
94 | except Exception as e:
95 | st.error(f"Error getting conversations: {str(e)}")
96 | return []
97 |
98 | def get_conversation(conversation_id: str) -> Optional[Dict[str, Any]]:
99 | """Get a specific conversation by ID."""
100 | try:
101 | response = httpx.get(get_api_url(f"/chat/conversations/{conversation_id}"))
102 | if response.status_code == 200:
103 | return response.json()
104 | else:
105 | st.error(f"Failed to get conversation: {response.text}")
106 | return None
107 | except Exception as e:
108 | st.error(f"Error getting conversation: {str(e)}")
109 | return None
110 |
111 | def delete_conversation(conversation_id: str) -> bool:
112 | """Delete a conversation by ID."""
113 | try:
114 | response = httpx.delete(get_api_url(f"/chat/conversations/{conversation_id}"))
115 | if response.status_code == 200:
116 | return True
117 | else:
118 | st.error(f"Failed to delete conversation: {response.text}")
119 | return False
120 | except Exception as e:
121 | st.error(f"Error deleting conversation: {str(e)}")
122 | return False
123 |
124 | # Utility Functions
125 | def format_timestamp(timestamp_str: str) -> str:
126 | """Format an ISO timestamp to a human-readable format."""
127 | try:
128 | if not timestamp_str:
129 | return "Unknown"
130 |
131 | # Handle both string ISO format and numeric timestamp
132 | if isinstance(timestamp_str, str):
133 | dt = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
134 | else:
135 | dt = datetime.fromtimestamp(timestamp_str)
136 |
137 | return dt.strftime("%Y-%m-%d %H:%M")
138 | except Exception:
139 | return "Invalid date"
140 |
141 | def format_file_size(size_bytes: int) -> str:
142 | """Format file size in bytes to human-readable format."""
143 | if size_bytes < 1024:
144 | return f"{size_bytes} bytes"
145 | elif size_bytes < 1024 * 1024:
146 | return f"{size_bytes / 1024:.1f} KB"
147 | elif size_bytes < 1024 * 1024 * 1024:
148 | return f"{size_bytes / (1024 * 1024):.1f} MB"
149 | else:
150 | return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
--------------------------------------------------------------------------------