├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── app.py
├── app
├── __init__.py
├── prompts.py
├── routes.py
├── static
│ ├── css
│ │ └── style.css
│ ├── favicon.ico
│ ├── images
│ │ ├── Home_Example_1.png
│ │ ├── Home_Example_2.png
│ │ ├── Home_Example_3.png
│ │ ├── PDF_Management_Eaxmple.png
│ │ ├── full_logo.png
│ │ └── logo.png
│ └── js
│ │ ├── home.js
│ │ └── script.js
└── templates
│ ├── index.html
│ └── pdfManagement.html
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore system files
2 | .DS_Store
3 | static/.DS_Store
4 | static/js/.DS_Store
5 |
6 | # Ignore virtual environment directories and files
7 | venv/
8 | bin/
9 | share/
10 | pyvenv.cfg
11 |
12 | # Ignore database directory contents, but keep the directory
13 | data/db/*
14 | !data/db/.gitkeep
15 |
16 | # Ignore PDF directory contents, but keep the directory
17 | data/pdf/*
18 | !data/pdf/.gitkeep
19 |
20 | # Ignore results directory
21 | results/
22 |
23 | # Byte-compiled / optimized / DLL files
24 | __pycache__/
25 | *.py[cod]
26 | *$py.class
27 |
28 | # Distribution / packaging
29 | .Python
30 | build/
31 | develop-eggs/
32 | dist/
33 | downloads/
34 | eggs/
35 | .eggs/
36 | lib/
37 | lib64/
38 | parts/
39 | sdist/
40 | var/
41 | wheels/
42 | *.egg-info/
43 | .installed.cfg
44 | *.egg
45 |
46 | # PyInstaller
47 | # Usually these files are written by a python script from a template
48 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
49 | *.manifest
50 | *.spec
51 |
52 | # Installer logs
53 | pip-log.txt
54 | pip-delete-this-directory.txt
55 |
56 | # Unit test / coverage reports
57 | htmlcov/
58 | .tox/
59 | .nox/
60 | .coverage
61 | .coverage.*
62 | .cache
63 | nosetests.xml
64 | coverage.xml
65 | *.cover
66 | *.py,cover
67 | .hypothesis/
68 | .pytest_cache/
69 |
70 | # Translations
71 | *.mo
72 | *.pot
73 |
74 | # Django stuff:
75 | *.log
76 | local_settings.py
77 |
78 | # Flask stuff:
79 | instance/
80 | .webassets-cache
81 |
82 | # Scrapy stuff:
83 | .scrapy
84 |
85 | # Sphinx documentation
86 | docs/_build/
87 |
88 | # PyBuilder
89 | target/
90 |
91 | # Jupyter Notebook
92 | .ipynb_checkpoints
93 |
94 | # IPython
95 | profile_default/
96 | ipython_config.py
97 |
98 | # PyCharm
99 | .idea/
100 |
101 | # VS Code
102 | .vscode/
103 |
104 | # mypy
105 | .mypy_cache/
106 | .dmypy.json
107 | dmypy.json
108 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 |
2 | # Contributor Covenant Code of Conduct
3 |
4 | ## Our Pledge
5 |
6 | We as members, contributors, and leaders pledge to make participation in our
7 | community a harassment-free experience for everyone, regardless of age, body
8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
9 | identity and expression, level of experience, education, socio-economic status,
10 | nationality, personal appearance, race, caste, color, religion, or sexual
11 | identity and orientation.
12 |
13 | We pledge to act and interact in ways that contribute to an open, welcoming,
14 | diverse, inclusive, and healthy community.
15 |
16 | ## Our Standards
17 |
18 | Examples of behavior that contributes to a positive environment for our
19 | community include:
20 |
21 | * Demonstrating empathy and kindness toward other people
22 | * Being respectful of differing opinions, viewpoints, and experiences
23 | * Giving and gracefully accepting constructive feedback
24 | * Accepting responsibility and apologizing to those affected by our mistakes,
25 | and learning from the experience
26 | * Focusing on what is best not just for us as individuals, but for the overall
27 | community
28 |
29 | Examples of unacceptable behavior include:
30 |
31 | * The use of sexualized language or imagery, and sexual attention or advances of
32 | any kind
33 | * Trolling, insulting or derogatory comments, and personal or political attacks
34 | * Public or private harassment
35 | * Publishing others' private information, such as a physical or email address,
36 | without their explicit permission
37 | * Other conduct which could reasonably be considered inappropriate in a
38 | professional setting
39 |
40 | ## Enforcement Responsibilities
41 |
42 | Community leaders are responsible for clarifying and enforcing our standards of
43 | acceptable behavior and will take appropriate and fair corrective action in
44 | response to any behavior that they deem inappropriate, threatening, offensive,
45 | or harmful.
46 |
47 | Community leaders have the right and responsibility to remove, edit, or reject
48 | comments, commits, code, wiki edits, issues, and other contributions that are
49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
50 | decisions when appropriate.
51 |
52 | ## Scope
53 |
54 | This Code of Conduct applies within all community spaces, and also applies when
55 | an individual is officially representing the community in public spaces.
56 | Examples of representing our community include using an official email address,
57 | posting via an official social media account, or acting as an appointed
58 | representative at an online or offline event.
59 |
60 | ## Enforcement
61 |
62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
63 | reported to the community leaders responsible for enforcement at
64 | [INSERT CONTACT METHOD].
65 | All complaints will be reviewed and investigated promptly and fairly.
66 |
67 | All community leaders are obligated to respect the privacy and security of the
68 | reporter of any incident.
69 |
70 | ## Enforcement Guidelines
71 |
72 | Community leaders will follow these Community Impact Guidelines in determining
73 | the consequences for any action they deem in violation of this Code of Conduct:
74 |
75 | ### 1. Correction
76 |
77 | **Community Impact**: Use of inappropriate language or other behavior deemed
78 | unprofessional or unwelcome in the community.
79 |
80 | **Consequence**: A private, written warning from community leaders, providing
81 | clarity around the nature of the violation and an explanation of why the
82 | behavior was inappropriate. A public apology may be requested.
83 |
84 | ### 2. Warning
85 |
86 | **Community Impact**: A violation through a single incident or series of
87 | actions.
88 |
89 | **Consequence**: A warning with consequences for continued behavior. No
90 | interaction with the people involved, including unsolicited interaction with
91 | those enforcing the Code of Conduct, for a specified period of time. This
92 | includes avoiding interactions in community spaces as well as external channels
93 | like social media. Violating these terms may lead to a temporary or permanent
94 | ban.
95 |
96 | ### 3. Temporary Ban
97 |
98 | **Community Impact**: A serious violation of community standards, including
99 | sustained inappropriate behavior.
100 |
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 |
107 | ### 4. Permanent Ban
108 |
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior, harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 |
113 | **Consequence**: A permanent ban from any sort of public interaction within the
114 | community.
115 |
116 | ## Attribution
117 |
118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119 | version 2.1, available at
120 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
121 |
122 | Community Impact Guidelines were inspired by
123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124 |
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
127 | [https://www.contributor-covenant.org/translations][translations].
128 |
129 | [homepage]: https://www.contributor-covenant.org
130 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
131 | [Mozilla CoC]: https://github.com/mozilla/diversity
132 | [FAQ]: https://www.contributor-covenant.org/faq
133 | [translations]: https://www.contributor-covenant.org/translations
134 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # AskItRight: AI-Powered PDF Query Application 🚀
6 |
7 | This repository contains a Flask web application that allows users to upload PDF documents, query their contents, and retrieve answers using an AI language model. The application integrates several functionalities to manage PDFs, handle user queries, and maintain usage statistics.
8 |
9 | ## High-Level Overview 🌟
10 |
11 | The application provides an interface for:
12 | - 📄 **Uploading and managing PDF documents**.
13 | - ❓ **Submitting queries to retrieve information from uploaded PDFs**.
14 | - 📊 **Tracking PDF usage statistics**.
15 | - 🔧 **Performing administrative operations like clearing data and deleting files**.
16 | - 🔍 **Viewing PDF files**
17 | - 📝 **Prompt templates management**
18 |
19 | ### Key Features 🔑
20 |
21 | 1. **PDF Management**:
22 | - **Upload PDFs**: 📤 Users can upload PDF files through the upload interface. These files are processed and stored in the system.
23 | - **List PDFs**: 📋 Users can view a list of all uploaded PDF files through the available PDFs interface.
24 | - **Delete PDFs**: 🗑️ Users can remove specific PDF files using the delete functionality available in the PDF management interface.
25 | - **View PDFs**: 👁️ Users can open and view the content of PDF files in a new browser tab directly from the list of PDFs.
26 |
27 | 2. **Query Handling**:
28 | - **Ask Questions to PDF**: 🤔 Users can submit questions about the content of uploaded PDFs using the query interface. The application uses the AI model to provide answers based on the PDF contents.
29 | - **AI Integration**: 🤖 The Ollama3.1 model is used to generate answers to queries from the content of the PDFs. This functionality is accessible through the AI query interface.
30 | - **Prompt Templates**: 📝 Users can view and select from various prompt templates to guide the AI's responses, ensuring they are tailored to specific needs. (Currently in progress, with frontend Create, Update, and Delete to be implemented.)
31 |
32 | 3. **Statistics and Administration**:
33 | - **Clear Chat History**: 🧹 Users can clear previous chat interactions using the clear chat history button in the query section.
34 | - **Clear Database**: 🚮 Deletes all stored PDFs and related data, effectively resetting the application’s state. This action is available in the database management section.
35 | - **PDF Usage Statistics**: 📈 Provides information on how frequently each PDF has been queried, viewable through the statistics dashboard.
36 |
37 | ### HTML Interfaces Overview 🖥️
38 |
39 | 1. **Document Interaction Dashboard**:
40 | - **Homepage**: 🏠 Features interfaces for asking questions about PDF content and interacting with the Ollama3.1 AI model. It also displays query and PDF usage statistics.
41 | - **PDF Query Section**: ❓ Allows users to submit questions about PDFs and view responses.
42 | - **AI Query Section**: 🤖 Provides functionality to query the Ollama3.1 AI model independently of PDFs.
43 | - **Statistics Section**: 📊 Displays usage statistics for both queries and PDFs.
44 |
45 | 2. **PDF Management and Statistics**:
46 | - **Upload and List PDFs**: 📤📋 An interface to upload new PDFs, view a list of all uploaded PDFs, and access each PDF file.
47 | - **Database Management**: 🗑️ Provides options to clear the database and manage stored PDFs.
48 | - **Statistics Dashboard**: 📈 Shows statistics related to the total number of PDFs and documents in the vector store.
49 |
50 | ### Examples
51 |
52 | | Image | Description |
53 | |-------|-------------|
54 | | | This screenshot shows the functionality of using PDFs with the 'Essay Expert' prompt template. At the top, the system leverages PDF content for detailed responses, while the lower section illustrates responses generated without PDFs. |
55 | | | This example demonstrates the advanced capabilities of the 'Essays Expert' prompt template. The screenshot highlights how the system utilises PDF content to generate comprehensive responses at the top, while the lower section shows the output generated without PDFs, illustrating the impact of including detailed content. |
56 | | | This screenshot reveals the limitations of the current app, indicating that it may struggle with queries beyond the scope of the provided documents. It underscores the need for further improvements and extensive testing to enhance the model's accuracy and robustness. |
57 | | | This screenshot demonstrates the PDF Management & Statistics Dashboard, showcasing how users can view detailed statistics related to the uploaded PDFs and documents within the system. |
58 |
59 | ## Installation Instructions ⚙️
60 |
61 | ### System Requirements 🖥️
62 |
63 | For Ollama3.1 8B, ensure your system has at least 16 GB of RAM, and reasonable disk space for a reasonable performance; a GPU is recommended for models with 70B parameters or higher.
64 |
65 | ### 1. Download and Install Ollama
66 |
67 | To use the `Ollama` model, follow these steps to download and install it based on your operating system:
68 |
69 | 1. **Visit the Ollama Download Page:**
70 | Go to [Ollama Download Page](https://ollama.com/download).
71 |
72 | 2. **Download the Installer:**
73 | Choose the appropriate installer for your operating system and download it.
74 |
75 | 3. **Install Ollama:**
76 | Follow the installation instructions provided on the download page for your specific operating system.
77 |
78 | 4. **Verify Installation:**
79 | After installation, verify that Ollama is installed correctly by running the following command in your terminal or command prompt:
80 |
81 | ```bash
82 | ollama --version
83 |
84 | 5. **Run the Llama3 Model:**
85 | Once Ollama is installed, start the llama3 model by running the following command in your terminal or command prompt:
86 |
87 | ```bash
88 | ollama run llama3.1
89 | ```
90 | When you run `ollama run llama3.1`, it defaults to using configuration 8b unless you specify otherwise.
91 |
92 | ### 2. **Clone the Repository**:
93 |
94 | git clone https://github.com/AbdArdati/PDFQueryAI.git
95 | cd PDFQueryAI
96 |
97 | ### 3. **Create a Virtual Environment**:
98 |
99 | python3 -m venv venv
100 | source venv/bin/activate # On Windows use `venv\Scripts\activate`
101 |
102 | ### 4. **Install Dependencies**:
103 |
104 | pip install -r requirements.txt
105 |
106 | ### 5. **Run the Application**:
107 |
108 | python app.py
109 |
110 | ### Endpoints and Their Functions
111 |
112 | - **`/`**: Serves the main HTML page for user interaction.
113 |
114 | - **`/ai`**:
115 | - **Method**: `POST`
116 | - **Function**: Accepts a JSON request containing a query, passes it to the AI model, and returns the generated response.
117 |
118 | - **`/ask_pdf`**:
119 | - **Method**: `POST`
120 | - **Function**: Processes queries against uploaded PDFs. It uses a vector store to find relevant documents, generates an answer based on the context, and returns the answer along with sources and usage statistics.
121 |
122 | - **`/clear_chat_history`**:
123 | - **Method**: `POST`
124 | - **Function**: Clears the chat history stored in the global `chat_history` list.
125 |
126 | - **`/clear_db`**:
127 | - **Method**: `POST`
128 | - **Function**: Deletes all stored vector data and PDF files from the filesystem, effectively resetting the application’s state.
129 |
130 | - **`/list_pdfs`**:
131 | - **Method**: `GET`
132 | - **Function**: Lists all PDF files currently available in the storage directory.
133 |
134 | - **`/pdf`**:
135 | - **Method**: `POST`
136 | - **Function**: Handles PDF uploads. The file is saved, processed, and split into chunks. These chunks are then stored in a vector database for later querying.
137 |
138 | - **`/list_documents`**:
139 | - **Method**: `GET`
140 | - **Function**: Lists all documents in the vector store.
141 |
142 | - **`/delete_pdf`**:
143 | - **Method**: `POST`
144 | - **Function**: Deletes a specific PDF file and its associated documents from the vector store.
145 |
146 | - **`/delete_document`**:
147 | - **Method**: `POST`
148 | - **Function**: Deletes a specific document from the vector store by its ID.
149 |
150 | - **`/pdf_usage`**:
151 | - **Method**: `GET`
152 | - **Function**: Provides usage statistics on how often each PDF has been queried.
153 |
154 | ### Internal Functions 🔧
155 |
156 | - **`file_exists(filename)`**: Checks if a file with the given name exists in the directory.
157 |
158 | - **`compute_file_hash(file)`**: Computes the MD5 hash of a file to detect duplicates.
159 |
160 | - **`perform_ocr(pdf_path)`**: Placeholder function for performing Optical Character Recognition (OCR) on a PDF if it is not structured. This needs to be implemented with an actual OCR library like `pytesseract`.
161 |
162 | ### Error Handling ⚠️
163 |
164 | - The application includes basic error handling for missing data, file operations, and vector store interactions. For example, if a file is not found or an operation fails, the application returns an appropriate error message and HTTP status code.
165 |
166 | ### Release Information 🚀
167 |
168 | **Current Version:** `v1.0.0-beta`
169 |
170 | **Release Date:** `25/07/2024`
171 |
172 | **Description:** This is the beta release of AskItRight. It includes features for uploading, managing, and querying PDF documents using an AI model, as well as basic statistics and administrative functionalities.
173 |
174 | **Changelog:**
175 | - Initial beta release with core functionalities.
176 | - Added PDF upload, query, and management features.
177 | - Integrated AI model for querying PDF content.
178 | - Implemented basic statistics and database management features.
179 |
180 | ### Summary 📝
181 |
182 | Overall, the application provides a structured way to manage and interact with PDF documents using an AI model. It integrates file management, data processing, and querying capabilities into a Flask web service, allowing users to upload, query, and manage PDFs while also keeping track of usage statistics and providing administrative functionalities.
183 |
184 |
185 | ### Disclaimer 📝
186 | Please be aware that this application is provided "as is," without any guarantee of functionality or reliability. It's important to note that the codebase may not adhere to best practices in terms of tidiness and organisation, and there is significant room for improvement and bug fixes. While I try to enhance the application, I plan to release a new version in the future as time permits.
187 |
188 | The developers disclaim all responsibility for bugs or issues, stating use is at one's own risk. Users are encouraged to contribute but the developers are not liable for any damages from using the application.
189 |
190 | ## License 🛡️
191 |
192 | This project is licensed under the Apache 2.0 License. The core components of this application follow the work from [https://github.com/ThomasJay/RAG](https://github.com/ThomasJay/RAG) which is also licensed under Apache 2.0.
193 |
194 | ```
195 | Licensed under the Apache License, Version 2.0 (the "License");
196 | you may not use this file except in compliance with the License.
197 | You may obtain a copy of the License at
198 |
199 | http://www.apache.org/licenses/LICENSE-2.0
200 |
201 | Unless required by applicable law or agreed to in writing, software
202 | distributed under the License is distributed on an "AS IS" BASIS,
203 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
204 | See the License for the specific language governing permissions and
205 | limitations under the License.
206 | ```
207 |
208 | ## Code of Conduct
209 |
210 | We are committed to fostering a welcoming and inclusive community. Please read our [Code of Conduct](CODE_OF_CONDUCT.md) to understand our guidelines and expectations.
211 |
212 | ---
213 | **Developed by Abd Alsattar Ardati for the sake of exploring, learning, and sharing.**
214 | Visit my [website](https://abd.ardati.org) for more information or contact me at abd.alsattar.ardati @ gmail.
215 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | As this project is primarily for personal experimentation and not intended for production deployment, I do not provide security updates or support for any versions. The project is in a developmental stage, and no security considerations have been incorporated. Use it at your own risk, as mentioned in the README.md.
6 |
7 | | Version | Supported |
8 | | ------- | ------------------ |
9 | | All | :x: |
10 |
11 | ## Reporting a Vulnerability
12 |
13 | Since this project is for personal use and experimentation only, I am not actively managing or addressing security vulnerabilities. However, if you encounter any issues or potential vulnerabilities, you may report them through the project's [issue tracker](https://github.com/AbdArdati/PDFQueryAI/issues).
14 |
15 | Please be aware that due to the experimental nature of this project, there are no guarantees of updates or fixes for reported issues. Reports are reviewed on a case-by-case basis, and while I appreciate feedback, there is no commitment to address or resolve security concerns promptly.
16 |
17 | Thank you for understanding.
18 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | from app import create_app
2 |
3 | if __name__ == "__main__":
4 | app = create_app()
5 | app.run(debug=True) # Turn on debugging mode for development
6 | print("Flask app is running on http://127.0.0.1:5000")
7 |
--------------------------------------------------------------------------------
/app/__init__.py:
--------------------------------------------------------------------------------
1 | from flask import Flask
2 | __version__ = '1.0.0-beta'
3 |
4 | def create_app():
5 | app = Flask(__name__, template_folder='templates')
6 |
7 | # Register blueprints
8 | from .routes import bp
9 | app.register_blueprint(bp)
10 |
11 | # Additional configuration if needed
12 |
13 | return app
14 |
--------------------------------------------------------------------------------
/app/prompts.py:
--------------------------------------------------------------------------------
1 | from langchain.prompts import PromptTemplate
2 |
3 | # Define all prompts
4 | PROMPTS = {
5 | "General AI Assistant": PromptTemplate.from_template(
6 | """
7 | [INST] You are an exceptionally advanced AI assistant, equipped with state-of-the-art capabilities to understand and analyze technical documents. Your role is to deliver responses that are not only accurate and insightful but also enriched with a deep understanding of the context provided by the PDFs.
8 |
9 | **Instructions:**
10 | - Thoroughly analyze the provided context and input.
11 | - Extract and synthesize key information from the PDFs to provide a comprehensive and informed response.
12 | - Enhance your responses with detailed explanations, advanced insights, and contextually relevant examples.
13 | - Present information in a structured format using Markdown where applicable, but prioritize clarity and depth of content over formatting.
14 | - Address the query with a high level of detail and sophistication, demonstrating a deep understanding of the subject matter.
15 | - If any critical information is missing or if further context is needed, clearly indicate this in your response.
16 |
17 | **Response Guidelines:**
18 | - **Introduction:** Begin with a brief overview of the topic, setting the stage for a detailed analysis.
19 | - **Detailed Analysis:** Provide an in-depth examination of the topic, incorporating insights derived from the PDFs.
20 | - **Contextual Insights:** Relate the information to the context provided by the PDFs, making connections and highlighting relevant points.
21 | - **Examples and Explanations:** Include specific examples, detailed explanations, and any relevant data or findings from the PDFs.
22 | - **Conclusion:** Summarize the key points and provide a well-rounded conclusion based on the analysis.
23 |
24 | **Example Output:**
25 |
26 | # Overview
27 | The provided PDFs offer a comprehensive overview of ...
28 |
29 | # In-Depth Analysis
30 | Based on the documents, the key findings include ...
31 |
32 | # Contextual Insights
33 | The analysis reveals that ...
34 |
35 | # Examples and Explanations
36 | For instance, document A highlights ...
37 |
38 | # Conclusion
39 | In conclusion, the analysis demonstrates ...
40 |
41 | **Your Response:**
42 | [/INST] {input}
43 | Context: {context}
44 | """
45 | ),
46 | "Summary": PromptTemplate.from_template(
47 | """
48 | [INST] You are an advanced AI assistant with expertise in summarizing technical documents. Your goal is to create a clear, concise, and well-organized summary using Markdown formatting. Focus on extracting and presenting the essential points of the document effectively.
49 |
50 | **Instructions:**
51 | - Analyze the provided context and input carefully.
52 | - Identify and highlight the key points, main arguments, and important details.
53 | - Format the summary using Markdown for clarity:
54 | - Use `#` for main headers and `##` for subheaders.
55 | - Use `**text**` for important terms or concepts.
56 | - Provide a brief introduction, followed by the main points, and a concluding summary if applicable.
57 | - Ensure the summary is easy to read and understand, avoiding unnecessary jargon.
58 |
59 | **Example Summary Format:**
60 |
61 | # Overview
62 | **Document Title:** *Technical Analysis Report*
63 |
64 | **Summary:**
65 | The report provides an in-depth analysis of the recent technical advancements in AI. It covers key areas such as ...
66 |
67 | # Key Findings
68 | - **Finding 1:** Description of finding 1.
69 | - **Finding 2:** Description of finding 2.
70 |
71 | # Conclusion
72 | The analysis highlights the significant advancements and future directions for AI technology.
73 |
74 | **Your Response:**
75 | [/INST] {input}
76 | Context: {context}
77 | """
78 | ),
79 | "Essays Expert": PromptTemplate.from_template(
80 | """
81 | [INST] Your task is to compose a detailed and engaging essay on the provided topic. Begin by thoroughly examining the context derived from PDFs uploaded by the user, along with the given input. Your essay should be seamlessly structured, starting with an engaging introduction that sets the stage and highlights the significance of the topic. Follow with a comprehensive body where you delve into the subject matter, offering in-depth analysis, relevant examples, and detailed explanations. Conclude with a reflective summary that captures the essence of your discussion and considers potential future implications or directions.
82 |
83 | Ensure that your essay flows continuously and cohesively, avoiding the use of bullet points or lists. Construct your writing with smooth transitions and connected sentences, employing clear and descriptive language to effectively convey your insights and findings.
84 |
85 | For example, if addressing recent developments in artificial intelligence, you should explore how advancements are transforming various sectors and influencing societal interactions. Discuss the implications of technological progress in machine learning and natural language processing on business practices and everyday life. Your conclusion should provide thoughtful reflections on the future trajectory of AI and its broader implications.
86 |
87 | **Your Response:**
88 | [/INST] Context derived from PDFs uploaded by the user: {context} {input}
89 | """
90 | ),
91 | "Technical": PromptTemplate.from_template(
92 | """
93 | [INST] You are a highly skilled AI assistant in technical document summarization. Your task is to provide a detailed and well-organized response using Markdown formatting. The response should be informative and structured, presenting data and information in a clear manner.
94 |
95 | **Instructions:**
96 | - Analyze the provided context and input comprehensively.
97 | - Use Markdown to structure the response effectively:
98 | - Employ `#`, `##`, `###` headers for different sections.
99 | - Use `**text**` to emphasize key points.
100 | - Include relevant links, code blocks, and tables if applicable.
101 | - Ensure that each section of the response flows logically and that the information is presented clearly.
102 | - Indicate if any critical information is missing and provide a structured layout for easy readability.
103 |
104 | **Markdown Formatting Guide:**
105 | - Headers: Use `#` for main headings, `##` for subheadings, and `###` for detailed subheadings.
106 | - Bold Text: Use `**text**` to highlight important terms or concepts.
107 | - Italic Text: Use `*text*` for emphasis.
108 | - Bulleted Lists: Use `-` or `*` for unordered lists where necessary.
109 | - Numbered Lists: Use `1.`, `2.` for ordered lists when appropriate.
110 | - Links: Include `[link text](URL)` to provide additional resources or references.
111 | - Code Blocks: Use triple backticks (```) for code snippets.
112 | - Tables: Use `|` to organize data into tables for clarity.
113 |
114 | **Example Output:**
115 |
116 | ## Introduction
117 | The document provides a thorough analysis of ...
118 |
119 | ## Key Details
120 | - **Aspect 1:** Detailed description of aspect 1.
121 | - **Aspect 2:** Detailed description of aspect 2.
122 |
123 | ## Analysis
124 | The analysis reveals ...
125 |
126 | ## Conclusion
127 | The summary highlights the significance of ...
128 |
129 | **Your Response:**
130 | [/INST] {input}
131 | Context: {context}
132 | """
133 | ),
134 | # Add more prompts as needed
135 | }
136 |
--------------------------------------------------------------------------------
/app/routes.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from flask import Flask, request, jsonify, render_template, Blueprint, send_from_directory
4 | from langchain_community.llms import Ollama
5 | from langchain_community.vectorstores import Chroma
6 | from langchain.text_splitter import RecursiveCharacterTextSplitter
7 | from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
8 | from langchain_community.document_loaders import PDFPlumberLoader
9 | from langchain.chains.combine_documents import create_stuff_documents_chain
10 | from langchain.chains import create_retrieval_chain
11 | from langchain_core.messages import HumanMessage, AIMessage
12 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
13 | from langchain.chains.history_aware_retriever import create_history_aware_retriever
14 | from .prompts import PROMPTS
15 |
16 | import os
17 | import shutil
18 | import hashlib
19 | import logging
20 |
21 | # Define a blueprint
22 | bp = Blueprint('main', __name__)
23 |
24 | # Folder paths
25 | folder_path = "data/db"
26 | pdf_dir = "data/pdf"
27 |
28 | # Path to the PDF directory for viewing PDFs
29 | PDF_DIRECTORY = os.path.join(os.path.dirname(__file__), '..', 'data', 'pdf')
30 |
31 | # Dictionary to keep track of PDF usage counts
32 | pdf_usage_count = {}
33 | query_usage_count = {}
34 | chat_history = []
35 |
36 | # Initialize the Ollama model
37 | cached_llm = Ollama(model="llama3.1")
38 |
39 | # Initialize the embedding model
40 | embedding = FastEmbedEmbeddings()
41 |
42 | # Initialize text splitter
43 | text_splitter = RecursiveCharacterTextSplitter(
44 | chunk_size=2048, # Increased chunk size for better context
45 | chunk_overlap=100, # Increased overlap to maintain context between chunks
46 | length_function=len,
47 | is_separator_regex=False
48 | )
49 |
50 | def initialize_vector_store():
51 | if not os.path.exists(folder_path):
52 | os.makedirs(folder_path)
53 | # Perform a test operation to ensure proper initialization
54 | try:
55 | vector_store = Chroma(persist_directory=folder_path, embedding_function=embedding)
56 | # Example check to validate database
57 | if not vector_store.get():
58 | print("Vector store is empty or not properly initialized.")
59 | # Additional initialization steps if needed
60 | return vector_store
61 | except Exception as e:
62 | print(f"Error initializing vector store: {str(e)}")
63 | return None
64 |
65 | initialize_vector_store()
66 |
67 | if not os.path.exists(folder_path):
68 | print(f"Error: Directory {folder_path} does not exist.")
69 |
70 | def file_exists(file_path):
71 | return os.path.isfile(file_path)
72 |
73 | def compute_file_hash(file):
74 | """Compute the MD5 hash of a file."""
75 | hash_md5 = hashlib.md5()
76 | for chunk in iter(lambda: file.read(4096), b""):
77 | hash_md5.update(chunk)
78 | file.seek(0) # Reset file pointer
79 | return hash_md5.hexdigest()
80 |
81 |
82 | if not os.path.exists(folder_path):
83 | os.makedirs(folder_path)
84 |
85 | if not os.path.exists(pdf_dir):
86 | os.makedirs(pdf_dir)
87 |
88 | def preprocess_text(text):
89 | # Implement text cleaning steps
90 | text = text.strip().replace('\n', ' ').replace('\r', '')
91 | return text
92 |
93 | class Document:
94 | def __init__(self, page_content, metadata=None):
95 | self.page_content = page_content
96 | self.metadata = metadata or {}
97 |
98 | @bp.route('/')
99 | def home():
100 | return render_template('index.html')
101 |
102 | @bp.route('/prompts', methods=['GET'])
103 | def get_prompts():
104 | try:
105 | # Convert PromptTemplate objects to a serializable format
106 | serializable_prompts = {key: str(value) for key, value in PROMPTS.items()}
107 | return jsonify(serializable_prompts)
108 | except Exception as e:
109 | return jsonify({"error": str(e)}), 500
110 |
111 |
112 | @bp.route("/pdfManagement")
113 | def pdfManagement():
114 | try:
115 | # Fetch the PDF and document statistics
116 | pdf_files = os.listdir("data/pdf") # Adjust the path as needed
117 | vector_store = Chroma(persist_directory="data/db", embedding_function=embedding)
118 | db_data = vector_store.get()
119 | document_count = len(db_data.get("metadatas", []))
120 |
121 | return render_template("pdfManagement.html", pdf_count=len(pdf_files), doc_count=document_count)
122 | except Exception as e:
123 | return jsonify({"error": str(e)}), 500
124 |
125 | @bp.route("/ai", methods=["POST"])
126 | def aiPost():
127 | query_usage_count = {}
128 | print("POST /ai called")
129 | json_content = request.json
130 | query = json_content.get("query")
131 |
132 | if not query:
133 | return jsonify({"error": "No 'query' found in JSON request"}), 400
134 |
135 | print(f"query: {query}")
136 |
137 | response = cached_llm.invoke(query)
138 |
139 | print(response)
140 |
141 | response_answer = {"answer": response}
142 | return jsonify(response_answer)
143 |
144 | @bp.route("/ask_pdf", methods=["POST"])
145 | def askPDFPost():
146 | query_usage_count = {}# Added for tracking PDF usage count
147 | print("POST /ask_pdf called")
148 |
149 | json_content = request.json
150 | query = json_content.get("query")
151 | prompt_type = json_content.get("promptType") # Get the prompt type
152 |
153 | if not query:
154 | return jsonify({"error": "No 'query' found in JSON request"}), 400
155 |
156 | print(f"**query**: {query}")
157 | print(f"**prompt_type**: {prompt_type}")
158 |
159 | # Dynamically select the prompt based on prompt_type
160 | prompt = PROMPTS.get(prompt_type)
161 | if not prompt:
162 | return jsonify({"error": "Unknown prompt type"}), 400
163 |
164 | try:
165 | print("Loading vector store")
166 | vector_store = Chroma(persist_directory=folder_path, embedding_function=embedding)
167 | db_data = vector_store.get()
168 |
169 | if not db_data.get("metadatas"):
170 | print("Call ended since there are no documents available to process the query.")
171 | return jsonify({
172 | "answer": "No documents available to process your query.",
173 | "disclaimer": "No documents available to process your query. Upload some PDFs to enable document search.",
174 | "pdf_usage": {},
175 | "query_usage": {}
176 | })
177 |
178 | print("Creating retrieval chain")
179 | retriever = vector_store.as_retriever(
180 | search_type="similarity_score_threshold",
181 | search_kwargs={
182 | "k": 20,
183 | "score_threshold": 0.1,
184 | },
185 | )
186 |
187 | retriever_prompt = ChatPromptTemplate.from_messages(
188 | [
189 | MessagesPlaceholder(variable_name="chat_history"),
190 | ("human", "{input}"),
191 | (
192 | "human",
193 | "Given the above conversation, generate a search query to lookup in order to get information relevant to the conversation",
194 | ),
195 | ]
196 | )
197 | history_aware_retriever = create_history_aware_retriever(
198 | llm=cached_llm, retriever=retriever, prompt=retriever_prompt
199 | )
200 | document_chain = create_stuff_documents_chain(cached_llm, prompt)
201 |
202 | retrieval_chain = create_retrieval_chain(
203 | history_aware_retriever,
204 | document_chain,
205 | )
206 |
207 | result = retrieval_chain.invoke({"input": query})
208 | chat_history.append(HumanMessage(content=query))
209 | chat_history.append(AIMessage(content=result["answer"]))
210 |
211 | print(chat_history)
212 |
213 | sources = create_context_with_metadata(result.get("context", []))
214 |
215 | # Update PDF usage count and query usage count as before
216 | for doc in result["context"]:
217 | pdf_source = doc.metadata.get("source")
218 | if pdf_source in pdf_usage_count:
219 | pdf_usage_count[pdf_source] += 1
220 | else:
221 | pdf_usage_count[pdf_source] = 1
222 |
223 | if pdf_source in query_usage_count:
224 | query_usage_count[pdf_source] += 1
225 | else:
226 | query_usage_count[pdf_source] = 1
227 |
228 | if not sources:
229 | answer = f"No relevant documents found for the query: {query}. This answer is generated without any PDF context."
230 | else:
231 | answer = result["answer"]
232 |
233 | response_answer = {
234 | "answer": answer,
235 | "sources": sources,
236 | "pdf_usage": {pdf: {"count": count, "percentage": (count / sum(pdf_usage_count.values()) * 100) if sum(pdf_usage_count.values()) > 0 else 0} for pdf, count in pdf_usage_count.items()},
237 | "query_usage": {pdf: {"count": count, "percentage": (count / sum(query_usage_count.values()) * 100) if sum(query_usage_count.values()) > 0 else 0} for pdf, count in query_usage_count.items()},
238 | "disclaimer": "This answer is not based on any available PDF documents." if not sources else None
239 | }
240 |
241 | return jsonify(response_answer)
242 | except Exception as e:
243 | return jsonify({"error": str(e)}), 500
244 |
245 |
246 | def create_context_with_metadata(documents):
247 | contexts = []
248 | for doc in documents:
249 | metadata = doc.metadata
250 | context = f"Document Source: {metadata.get('source', 'Unknown')}\n"
251 | context += f"Content: {doc.page_content}\n"
252 | contexts.append({
253 | "source": metadata.get("source", "Unknown"),
254 | "page_content": doc.page_content
255 | })
256 | return contexts
257 |
258 | @bp.route("/clear_chat_history", methods=["POST"])
259 | def clear_chat_history():
260 | global chat_history
261 | chat_history = []
262 | print("Chat history cleared")
263 | return jsonify({"status": "Chat history cleared successfully"})
264 |
265 | @bp.route("/clear_db", methods=["POST"])
266 | def clear_db():
267 | logging.info("POST /clear_db called")
268 | try:
269 | # Clear the vector store (i.e., delete all documents)
270 | clear_vector_store()
271 |
272 | # Clear the PDF directory if necessary
273 | clear_directory(pdf_dir)
274 |
275 | # Reinitialize the vector store
276 | global vector_store
277 | vector_store = initialize_vector_store()
278 |
279 | return jsonify({"status": "Database and files cleared successfully"})
280 | except Exception as e:
281 | logging.error(f"Error during clear_db operation: {str(e)}")
282 | return jsonify({"error": str(e)}), 500
283 |
284 | def clear_directory(directory_path):
285 | """
286 | Clears the specified directory by removing it and then recreating it.
287 | """
288 | if os.path.exists(directory_path):
289 | shutil.rmtree(directory_path)
290 | os.makedirs(directory_path)
291 | logging.info(f"Directory cleared: {directory_path}")
292 |
293 | def clear_vector_store():
294 | """
295 | Clears all documents from the vector store.
296 | """
297 | try:
298 | global vector_store
299 | # Initialize the vector store
300 | vector_store = Chroma(persist_directory=folder_path, embedding_function=embedding)
301 |
302 | # Get all document IDs from the vector store
303 | db_data = vector_store.get() # Get the data from the vector store
304 | ids = db_data.get("ids", [])
305 |
306 | # Log the number of documents found
307 | logging.info(f"Found {len(ids)} documents in vector store")
308 |
309 | if ids:
310 | # Delete all documents by their IDs
311 | for doc_id in ids:
312 | if doc_id:
313 | vector_store.delete(doc_id)
314 | logging.info(f"Deleted document with ID: {doc_id}")
315 |
316 | # Persist changes to the vector store
317 | vector_store.persist()
318 | logging.info("Successfully deleted all documents and persisted changes.")
319 | else:
320 | logging.info("No documents found in vector store to delete.")
321 |
322 | except Exception as e:
323 | logging.error(f"Error during vector store clearing: {str(e)}")
324 | raise
325 |
326 |
327 | @bp.route("/list_pdfs", methods=["GET"])
328 | def list_pdfs():
329 | print("GET /list_pdfs called")
330 | print(f"pdf_dir: {pdf_dir}")
331 | print(f"pdf_dir: {pdf_files}")
332 | try:
333 | pdf_files = os.listdir(pdf_dir)
334 | return jsonify({"pdf_files": pdf_files})
335 | except Exception as e:
336 | return jsonify({"error": str(e)}), 500
337 |
338 | @bp.route("/pdf", methods=["POST"])
339 | def pdfPost():
340 | if 'file' not in request.files:
341 | return jsonify({"error": "No file part in the request"}), 400
342 |
343 | file = request.files['file']
344 |
345 | if file.filename == '':
346 | return jsonify({"error": "No selected file"}), 400
347 |
348 | file_name = file.filename
349 | save_file = os.path.join(pdf_dir, file_name)
350 |
351 | # Check if the file already exists
352 | if file_exists(save_file):
353 | return jsonify({"error": "File already exists."}), 400
354 |
355 | # Compute file hash to check for duplicates
356 | try:
357 | file_hash = compute_file_hash(file)
358 | except Exception as e:
359 | return jsonify({"error": f"Error computing file hash: {str(e)}"}), 500
360 |
361 | existing_files = [f for f in os.listdir(pdf_dir) if f.endswith('.pdf')]
362 | for existing_file in existing_files:
363 | existing_file_path = os.path.join(pdf_dir, existing_file)
364 | try:
365 | if compute_file_hash(open(existing_file_path, 'rb')) == file_hash:
366 | return jsonify({"error": "File with identical content already exists."}), 400
367 | except Exception as e:
368 | return jsonify({"error": f"Error checking existing files: {str(e)}"}), 500
369 |
370 | # Save the file
371 | try:
372 | file.save(save_file)
373 | except Exception as e:
374 | return jsonify({"error": f"Error saving file: {str(e)}"}), 500
375 |
376 | # Load and preprocess the PDF file
377 | try:
378 | loader = PDFPlumberLoader(save_file)
379 | docs = loader.load_and_split()
380 | is_structured = True
381 | except Exception as e:
382 | print(f"Error loading structured text: {e}")
383 | docs = []
384 | is_structured = False
385 |
386 | if not docs:
387 | # Perform OCR if the document is unstructured
388 | try:
389 | print("Performing OCR")
390 | text = perform_ocr(save_file)
391 | docs = [Document(page_content=preprocess_text(text), metadata={"source": file_name})]
392 | is_structured = False
393 | except Exception as e:
394 | return jsonify({"error": f"Error during OCR processing: {str(e)}"}), 500
395 | else:
396 | # Preprocess text from documents
397 | docs = [Document(page_content=preprocess_text(doc.page_content), metadata={"source": file_name}) for doc in docs]
398 |
399 | try:
400 | chunks = text_splitter.split_documents(docs)
401 | print(f"Loaded len={len(chunks)} chunks")
402 |
403 | # Add source metadata to each chunk
404 | for chunk in chunks:
405 | chunk.metadata = {"source": file_name}
406 | global vector_store
407 | # Initialize the vector store
408 | vector_store = Chroma.from_documents(
409 | documents=chunks, embedding=embedding, persist_directory=folder_path
410 | )
411 | except Exception as e:
412 | return jsonify({"error": f"Error initializing vector store: {str(e)}"}), 500
413 |
414 | response = {
415 | "status": "Successfully Uploaded",
416 | "filename": file_name,
417 | "doc_len": len(docs),
418 | "chunk_len": len(chunks),
419 | "is_structured": is_structured
420 | }
421 | return jsonify(response)
422 |
423 |
424 | @bp.route("/list_documents", methods=["GET"])
425 | def list_documents():
426 |
427 | try:
428 | print("GET /list_pdfs called")
429 | print(f"pdf_dir: {folder_path}")
430 | print(f"pdf_dir: {pdf_dir}")
431 | print(f"folder_path: {folder_path}")
432 | print(f"embedding function: {embedding}")
433 |
434 | if not os.path.exists(folder_path):
435 | print(f"The directory {folder_path} does not exist. Initializing vector store...")
436 | os.makedirs(folder_path, exist_ok=True) # Ensure the folder exists before initializing
437 |
438 | print("Initializing vector store...")
439 | initialize_vector_store()
440 | try:
441 | vector_store = Chroma(persist_directory=folder_path, embedding_function=embedding)
442 | except Exception as init_error:
443 | print(f"Failed to initialize vector store: {init_error}")
444 | return jsonify({"error": "Failed to initialize vector store"}), 500
445 | print("Vector store initialized.")
446 |
447 | print("Fetching data from vector store...")
448 | try:
449 | db_data = vector_store.get()
450 | except Exception as fetch_error:
451 | print(f"Failed to fetch data from vector store: {fetch_error}")
452 | return jsonify({"error": "Failed to fetch data from vector store"}), 500
453 |
454 | if not db_data or "metadatas" not in db_data or not db_data["metadatas"]:
455 | print("No documents found in vector store.")
456 | return jsonify({"message": "No documents found"}), 200
457 |
458 | documents = [{"source": metadata.get("source", "Unknown")} for metadata in db_data["metadatas"]]
459 | response = {"documents": documents}
460 | print("Documents extracted:", documents)
461 | return jsonify(response), 200
462 |
463 | except Exception as e:
464 | print(f"Error listing documents: {e}")
465 | return jsonify({"error": "An error occurred while listing documents. Please try again later."}), 500
466 |
467 | @bp.route("/delete_pdf", methods=["POST"])
468 | def delete_pdf():
469 | json_content = request.json
470 | file_name = json_content.get("file_name")
471 |
472 | if not file_name:
473 | return jsonify({"error": "No 'file_name' found in JSON request"}), 400
474 |
475 | try:
476 | # Construct the file path
477 | file_path = os.path.join(pdf_dir, file_name)
478 | print(f"Attempting to delete file at: {file_path}")
479 |
480 | # Create a path pattern to match against
481 | path_pattern = file_name # Use only file_name for pattern matching
482 | print(f"Looking for documents with source: {path_pattern}")
483 |
484 | # Remove the PDF references from the vector store
485 | vector_store = Chroma(persist_directory=folder_path, embedding_function=embedding)
486 |
487 | # Get all documents from the vector store
488 | db_data = vector_store.get() # Get the data from the vector store
489 | print(f"db_data: {db_data}")
490 | metadatas = db_data.get("metadatas", [])
491 | ids = db_data.get("ids", [])
492 |
493 | # Log the number of documents and sample metadata
494 | print(f"Found {len(metadatas)} documents in vector store")
495 | if metadatas:
496 | print(f"Sample metadata: {metadatas[0]}")
497 |
498 | # Find and delete documents with the matching source path
499 | docs_to_delete = [id for id, metadata in zip(ids, metadatas) if metadata.get("source").strip().lower() == path_pattern.strip().lower()]
500 | print(f"Documents to delete: {docs_to_delete}")
501 |
502 | if docs_to_delete:
503 | for doc_id in docs_to_delete:
504 | if doc_id is None:
505 | print("Encountered None as doc_id, skipping deletion.")
506 | continue
507 | print(f"Deleting document with ID: {doc_id}")
508 | vector_store.delete(doc_id)
509 |
510 | # Persist changes to the vector store
511 | vector_store.persist()
512 | print(f"Successfully deleted documents and persisted changes.")
513 | else:
514 | print(f"No documents found for source: {path_pattern}")
515 | return jsonify({"status": "No documents found for the provided file name"}), 404
516 |
517 | # Check if the file exists
518 | if os.path.exists(file_path):
519 | os.remove(file_path)
520 | print(f"Successfully deleted file: {file_path}")
521 | else:
522 | print(f"File not found: {file_path}")
523 | return jsonify({"error": "File not found"}), 404
524 |
525 | return jsonify({"status": "success"})
526 | except Exception as e:
527 | print(f"Error during deletion: {str(e)}")
528 | return jsonify({"error": str(e)}), 500
529 |
530 | @bp.route('/pdfs/')
531 | def serve_pdf(filename):
532 | return send_from_directory(PDF_DIRECTORY, filename)
533 |
534 | @bp.route("/delete_document", methods=["POST"])
535 | def delete_document():
536 | print("POST /delete_document called")
537 | json_content = request.json
538 | doc_id = json_content.get("doc_id")
539 |
540 | if not doc_id:
541 | return jsonify({"error": "No 'doc_id' found in JSON request"}), 400
542 |
543 | try:
544 | vector_store = Chroma(persist_directory=folder_path, embedding_function=embedding)
545 | vector_store.delete(doc_id)
546 | return jsonify({"status": "Document deleted successfully"})
547 | except Exception as e:
548 | return jsonify({"error": str(e)}), 500
549 |
550 | def perform_ocr(pdf_path):
551 | # Placeholder function for OCR processing
552 | # You can implement this using libraries like pytesseract or other OCR tools
553 | return "OCR processed text from PDF"
554 |
555 | @bp.route("/pdf_usage", methods=["GET"])
556 | def get_pdf_usage():
557 | try:
558 | # Calculate percentage influence
559 | total_queries = sum(pdf_usage_count.values())
560 | pdf_influence = {pdf: {"count": count, "percentage": (count / total_queries * 100) if total_queries > 0 else 0} for pdf, count in pdf_usage_count.items()}
561 | return jsonify({"pdf_usage": pdf_influence})
562 | except Exception as e:
563 | return jsonify({"error": str(e)}), 500
564 |
565 | # Configure logging
566 | logging.basicConfig(level=logging.INFO, filename='app.log', filemode='a')
567 |
--------------------------------------------------------------------------------
/app/static/css/style.css:
--------------------------------------------------------------------------------
1 | /* Root Variables */
2 | :root {
3 | --primary-color: #007bff;
4 | --secondary-color: #0056b3;
5 | --highlight-color: #003d7a;
6 | --text-color: #333;
7 | --background-color: #f4f4f4;
8 | --card-background: #fff;
9 | --border-color: #ddd;
10 | --border-radius: 8px;
11 | --box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
12 | --transition-duration: 0.3s;
13 | }
14 |
15 | /* General Styles */
16 | body {
17 | font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
18 | margin: 0;
19 | padding: 0;
20 | background-color: var(--background-color);
21 | color: var(--text-color);
22 | }
23 |
24 | .container {
25 | max-width: 1200px;
26 | margin: 0 auto;
27 | padding: 20px;
28 | }
29 |
30 |
31 | /* Header Styles */
32 | header {
33 | background-color: var(--primary-color);
34 | color: #fff;
35 | padding: 10px 10px;
36 | text-align: center;
37 | box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
38 | border-bottom: 5px solid #0056b3;
39 | margin-bottom: 20px;
40 | }
41 |
42 | .title-wrapper {
43 | position: relative;
44 | display: inline-block;
45 | }
46 |
47 | header h1 {
48 | margin: 0;
49 | font-size: 2.5em;
50 | font-weight: 700;
51 | color: #ffffff;
52 | }
53 |
54 | .ai-powered {
55 | position: absolute;
56 | top: 100%;
57 | left: 101%;
58 | transform: translateX(-50%);
59 | font-weight: normal;
60 | color: #FFFFFF;
61 | background-color: transparent;
62 | border-radius: 5px;
63 | font-size: 13px;
64 | margin-top: -10px;
65 | white-space: nowrap;
66 | }
67 |
68 | header h2 {
69 | font-size: 20px;
70 | color: #ffffff;
71 | margin-top: 20px;
72 | }
73 |
74 | /* Media Query for Smaller Screens */
75 | @media (max-width: 768px) {
76 | .ai-powered {
77 | position: static;
78 | transform: none;
79 | margin-top: 10px;
80 | white-space: normal; /* Allow text to wrap */
81 | display: block;
82 | text-align: center;
83 | }
84 |
85 | .title-wrapper {
86 | text-align: center;
87 | }
88 | }
89 |
90 | /* Navbar Styles */
91 | .navbar {
92 | background-color: var(--secondary-color);
93 | padding: 10px 0;
94 | }
95 |
96 | .navbar .container {
97 | display: flex;
98 | align-items: center;
99 | justify-content: space-between; /* Space between logo and links */
100 | }
101 |
102 | .navbar-logo {
103 | flex-shrink: 0; /* Prevent logo from shrinking */
104 | }
105 |
106 | .header-image {
107 | width: 70px; /* Adjust as needed */
108 | height: auto; /* Maintain aspect ratio */
109 | margin-right: 10px; /* Space between the image and text */
110 | }
111 |
112 | .navbar-links {
113 | display: flex;
114 | gap: 15px; /* Space between nav links */
115 | }
116 |
117 | .nav-link {
118 | color: #fff;
119 | text-decoration: none;
120 | padding: 10px 20px;
121 | font-size: 18px;
122 | transition: background-color var(--transition-duration), color var(--transition-duration);
123 | }
124 |
125 | .nav-link:hover {
126 | background-color: var(--highlight-color);
127 | color: #ffd700;
128 | }
129 |
130 | /* Card Styles */
131 | .card {
132 | background: var(--card-background);
133 | border-radius: var(--border-radius);
134 | box-shadow: var(--box-shadow);
135 | padding: 20px;
136 | margin-bottom: 20px;
137 | }
138 |
139 | /* Section Styles */
140 | .section-group {
141 | display: flex;
142 | flex-wrap: wrap;
143 | gap: 20px;
144 | }
145 |
146 | .section {
147 | flex: 1;
148 | min-width: calc(50% - 20px);
149 | }
150 |
151 | h2 {
152 | color: var(--text-color);
153 | margin-top: 0;
154 | }
155 |
156 | /* Button Styles */
157 | button {
158 | padding: 10px 20px;
159 | background: var(--primary-color);
160 | color: #fff;
161 | border: none;
162 | cursor: pointer;
163 | border-radius: 5px;
164 | font-size: 16px;
165 | transition: background-color var(--transition-duration), transform var(--transition-duration);
166 | }
167 |
168 | button:hover {
169 | background: var(--secondary-color);
170 | transform: scale(1.05);
171 | }
172 |
173 | /* Input Styles */
174 | input[type="file"], input[type="text"] {
175 | width: 100%;
176 | padding: 10px;
177 | margin: 10px 0;
178 | box-sizing: border-box;
179 | border: 1px solid var(--border-color);
180 | border-radius: 5px;
181 | }
182 |
183 | /* List Styles */
184 | ul, ol {
185 | padding: 0;
186 | margin: 0;
187 | list-style-position: inside;
188 | }
189 |
190 | li {
191 | background: var(--card-background);
192 | margin: 5px 0;
193 | padding: 10px;
194 | border: 1px solid var(--border-color);
195 | border-radius: 5px;
196 | }
197 |
198 | /* Container for response and resize handle */
199 | .response-container {
200 | position: relative;
201 | margin-top: 20px;
202 | width: 100%;
203 | box-sizing: border-box;
204 | }
205 |
206 | /* Response Section */
207 | .response {
208 | padding: 15px;
209 | border: 1px solid var(--border-color);
210 | border-radius: 5px;
211 | background: #fff;
212 | height: 150px;
213 | max-height: 500px;
214 | min-height: 100px;
215 | overflow-y: auto;
216 | box-sizing: border-box;
217 | width: 100%;
218 | }
219 |
220 | /* Resize Handle */
221 | .resize-handle {
222 | width: 20px;
223 | height: 20px;
224 | background: var(--primary-color);
225 | position: absolute;
226 | bottom: 0;
227 | right: 0;
228 | cursor: ns-resize;
229 | z-index: 10;
230 | border-top-left-radius: 5px;
231 | border-top-right-radius: 5px;
232 | box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
233 | }
234 |
235 | /* Loading Spinner */
236 | .spinner {
237 | border: 4px solid rgba(0, 0, 0, 0.1);
238 | border-radius: 50%;
239 | border-top: 4px solid var(--primary-color);
240 | width: 24px;
241 | height: 24px;
242 | animation: spin 1s linear infinite;
243 | margin: 10px auto;
244 | }
245 |
246 | /* Spin Animation */
247 | @keyframes spin {
248 | 0% { transform: rotate(0deg); }
249 | 100% { transform: rotate(360deg); }
250 | }
251 |
252 | /* Loading Message */
253 | .loading-message {
254 | font-size: 16px;
255 | color: var(--primary-color);
256 | text-align: center;
257 | margin: 10px 0;
258 | }
259 |
260 | /* Statistics Section */
261 | .stats {
262 | padding: 10px;
263 | background: #f9f9f9;
264 | border: 1px solid var(--border-color);
265 | border-radius: 5px;
266 | margin-top: 20px;
267 | }
268 |
269 | /* Status Messages */
270 | .status-message {
271 | font-size: 16px;
272 | color: var(--primary-color);
273 | text-align: center;
274 | margin-top: 10px;
275 | }
276 |
277 | /* Fade Out Animation */
278 | .fade-out {
279 | opacity: 0;
280 | transition: opacity 2s ease-out;
281 | }
282 |
283 | /* Button Styles (Additional) */
284 | .view-button {
285 | background-color: #4CAF50;
286 | color: white;
287 | border: none;
288 | padding: 10px 15px;
289 | margin-right: 5px;
290 | border-radius: 5px;
291 | cursor: pointer;
292 | }
293 |
294 | .delete-button {
295 | background-color: #f44336;
296 | color: white;
297 | border: none;
298 | padding: 10px 15px;
299 | border-radius: 5px;
300 | cursor: pointer;
301 | }
302 |
303 | .view-button:hover, .delete-button:hover {
304 | opacity: 0.8;
305 | }
306 |
307 | /* Enhanced Styles for Prompt Container */
308 | #promptContainer {
309 | max-width: 600px;
310 | margin: 20px auto;
311 | padding: 20px;
312 | border: 1px solid var(--border-color);
313 | border-radius: var(--border-radius);
314 | background-color: #f9f9f9;
315 | box-shadow: var(--box-shadow);
316 | }
317 |
318 | #promptContainer p {
319 | font-size: 16px;
320 | color: var(--text-color);
321 | margin-bottom: 20px;
322 | line-height: 1.5;
323 | }
324 |
325 | /* Style for the select dropdown */
326 | #promptType {
327 | width: 100%;
328 | padding: 10px;
329 | font-size: 16px;
330 | border: 1px solid var(--border-color);
331 | border-radius: 4px;
332 | background-color: #fff;
333 | box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.12);
334 | transition: border-color var(--transition-duration) ease;
335 | }
336 |
337 | #promptType:focus {
338 | border-color: var(--primary-color);
339 | outline: none;
340 | box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
341 | }
342 |
343 | /* Container for each PDF item */
344 | .pdf-item {
345 | display: flex;
346 | justify-content: space-between; /* Space between content and buttons */
347 | align-items: center; /* Align items vertically in the center */
348 | margin-bottom: 20px;
349 | padding: 10px;
350 | border: 1px solid var(--border-color);
351 | border-radius: 5px;
352 | background-color: var(--card-background);
353 | }
354 |
355 |
356 |
357 |
358 | /* Button container to hold buttons */
359 | .button-container {
360 | display: flex;
361 | gap: 10px; /* Space between buttons */
362 | }
363 |
364 | /* General button style */
365 | .button-container .btn {
366 | padding: 8px 12px;
367 | font-size: 14px;
368 | color: #fff;
369 | border: none;
370 | border-radius: 4px;
371 | cursor: pointer;
372 | transition: background-color 0.3s ease;
373 | }
374 |
375 | .button-container .btn {
376 | padding: 8px 12px;
377 | font-size: 14px;
378 | border: none;
379 | border-radius: var(--border-radius);
380 | cursor: pointer;
381 | transition: background-color var(--transition-duration), transform var(--transition-duration);
382 | }
383 |
384 |
385 | /* General button style */
386 | .button-container .btn {
387 | padding: 8px 12px;
388 | font-size: 14px;
389 | color: #fff;
390 | border: none;
391 | border-radius: 4px;
392 | cursor: pointer;
393 | transition: background-color 0.3s ease;
394 | }
395 |
396 | /* Specific button styles */
397 | .view-button {
398 | background-color: #007bff;
399 | }
400 |
401 | .view-button:hover {
402 | background-color: #0056b3;
403 | }
404 |
405 | .delete-button {
406 | background-color: #dc3545;
407 | }
408 |
409 | .delete-button:hover {
410 | background-color: #c82333;
411 | }
412 |
413 | #pdfList {
414 | margin: 20px;
415 | }
416 |
417 | /* Accessibility Improvements */
418 | :focus {
419 | outline: 3px solid var(--highlight-color);
420 | outline-offset: 2px;
421 | }
422 |
423 | /* Dark Mode Support */
424 | @media (prefers-color-scheme: dark) {
425 | :root {
426 | --primary-color: #0056b3;
427 | --secondary-color: #007bff;
428 | --highlight-color: #ffd700;
429 | --text-color: #f4f4f4;
430 | --background-color: #333;
431 | --card-background: #444;
432 | --border-color: #555;
433 | }
434 |
435 | body {
436 | background-color: var(--background-color);
437 | color: var(--text-color);
438 | }
439 |
440 | .nav-link, button, .status-message, .loading-message {
441 | color: #fff;
442 | }
443 |
444 | .card, .response, .stats, .status-message, .loading-message, #promptContainer, .pdf-item {
445 | background: var(--card-background);
446 | border-color: var(--border-color);
447 | }
448 |
449 | input[type="file"], input[type="text"], #promptType {
450 | background-color: #555;
451 | color: #fff;
452 | }
453 | }
454 |
455 | /* Smooth Transitions for Hover and Focus */
456 | a, button, .nav-link, input[type="file"], input[type="text"], #promptType {
457 | transition: background-color 0.3s ease, color 0.3s ease, border-color 0.3s ease, box-shadow 0.3s ease;
458 | }
459 |
460 | /* Consistent Spacing and Alignment */
461 | * {
462 | box-sizing: border-box;
463 | }
464 |
465 | .container, .section-group, .button-container, #pdfList {
466 | padding-left: 15px;
467 | padding-right: 15px;
468 | }
469 |
470 | .section-group, .button-container {
471 | justify-content: space-between;
472 | }
473 |
474 | /* Enhancements for Readability */
475 | body, h1, h2, p, .nav-link, button, input, .status-message, .loading-message {
476 | line-height: 1.6;
477 | font-weight: normal;
478 | }
479 |
480 | h1, h2 {
481 | font-weight: bold;
482 | }
483 |
484 | footer {
485 | background-color: #007bff;
486 | color: #fff;
487 | padding: 20px 0;
488 | text-align: center;
489 | border-top: 5px solid #0056b3;
490 | }
491 |
492 | footer a {
493 | color: #ffd700;
494 | text-decoration: none;
495 | }
496 |
497 | footer a:hover {
498 | color: #ffffff;
499 | text-decoration: underline;
500 | }
501 |
502 | .footer-content {
503 | max-width: 1200px;
504 | margin: 0 auto;
505 | padding: 0 20px;
506 | }
507 |
508 | @media (max-width: 768px) {
509 | .footer-content {
510 | text-align: center;
511 | }
512 | }
513 |
514 | .navigation {
515 | display: flex;
516 | justify-content: center;
517 | padding: 20px 0;
518 | }
519 |
520 | .navigation .button {
521 | background-color: #6c757d;
522 | color: #ffffff;
523 | padding: 8px 16px;
524 | text-decoration: none;
525 | border-radius: 4px;
526 | font-weight: normal;
527 | font-size: 0.9rem;
528 | transition: background-color 0.3s, transform 0.2s;
529 | }
530 |
531 | .navigation .button:hover {
532 | background-color: #5a6268;
533 | transform: translateY(-2px);
534 | }
535 |
536 | .app-name {
537 | font-weight: bold;
538 | color: #4CAF50;
539 | margin-right: 10px;
540 | }
541 |
542 |
543 | .header-image {
544 | width: 70px;
545 | height: auto;
546 | margin-right: 10px;
547 | }
548 |
549 | .welcome {
550 | font-family: 'Brush Script MT', cursive; /* Example of a handwritten-style font */
551 | font-size: 35px; /* Adjust size as needed */
552 | color: #ffffff; /* Example color, adjust as needed */
553 | margin-right: 5px;
554 | }
555 |
556 | @media (max-width: 768px) {
557 | .pdf-item {
558 | flex-direction: column; /* Stack items vertically on smaller screens */
559 | padding: 15px;
560 | }
561 |
562 | .button-container {
563 | flex-direction: column; /* Stack buttons vertically */
564 | gap: 10px;
565 | justify-content: center; /* Center buttons on smaller screens */
566 | }
567 |
568 | .button-container .btn {
569 | width: 100%; /* Make buttons full width on small screens */
570 | font-size: 16px;
571 | }
572 | }
573 |
574 | @media (max-width: 480px) {
575 | .pdf-item {
576 | padding: 10px;
577 | }
578 |
579 | .button-container .btn {
580 | padding: 10px;
581 | font-size: 14px;
582 | }
583 | }
584 |
--------------------------------------------------------------------------------
/app/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AbdArdati/PDFQueryAI/5448eeb6ebfbf31f881b51227e1a7468518d7449/app/static/favicon.ico
--------------------------------------------------------------------------------
/app/static/images/Home_Example_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AbdArdati/PDFQueryAI/5448eeb6ebfbf31f881b51227e1a7468518d7449/app/static/images/Home_Example_1.png
--------------------------------------------------------------------------------
/app/static/images/Home_Example_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AbdArdati/PDFQueryAI/5448eeb6ebfbf31f881b51227e1a7468518d7449/app/static/images/Home_Example_2.png
--------------------------------------------------------------------------------
/app/static/images/Home_Example_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AbdArdati/PDFQueryAI/5448eeb6ebfbf31f881b51227e1a7468518d7449/app/static/images/Home_Example_3.png
--------------------------------------------------------------------------------
/app/static/images/PDF_Management_Eaxmple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AbdArdati/PDFQueryAI/5448eeb6ebfbf31f881b51227e1a7468518d7449/app/static/images/PDF_Management_Eaxmple.png
--------------------------------------------------------------------------------
/app/static/images/full_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AbdArdati/PDFQueryAI/5448eeb6ebfbf31f881b51227e1a7468518d7449/app/static/images/full_logo.png
--------------------------------------------------------------------------------
/app/static/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AbdArdati/PDFQueryAI/5448eeb6ebfbf31f881b51227e1a7468518d7449/app/static/images/logo.png
--------------------------------------------------------------------------------
/app/static/js/home.js:
--------------------------------------------------------------------------------
1 | // Function to make API requests
2 | async function apiRequest(url, options = {}) {
3 | try {
4 | const response = await fetch(url, options);
5 |
6 | // Check if the response status is not OK
7 | if (!response.ok) {
8 | const errorText = await response.text(); // Read response text for debugging
9 | throw new Error(`Failed to fetch from ${url}. Status: ${response.status}. Response: ${errorText}`);
10 | }
11 |
12 | // Try parsing JSON, if applicable
13 | try {
14 | return await response.json();
15 | } catch (jsonError) {
16 | throw new Error(`Failed to parse JSON response from ${url}. Response: ${await response.text()}`);
17 | }
18 | } catch (error) {
19 | console.error('API request error:', error.message);
20 | throw error; // Re-throw error to be handled by the calling function
21 | }
22 | }
23 |
24 | // Function to copy text from PDF field and submit to AI
25 | async function copyTextAndSubmit() {
26 | const pdfText = document.getElementById('queryPDF').value;
27 | document.getElementById('query').value = pdfText;
28 | await askAI(); // Call askAI directly since it's defined in this file
29 | }
30 |
31 | // Function to ask AI a question
32 | async function askAI() {
33 | console.log('askAI called'); // Debug log
34 | const query = document.getElementById('query').value;
35 | const responseDiv = document.getElementById('queryResponseAI');
36 |
37 | if (!query) {
38 | alert('Please enter a query.');
39 | return;
40 | }
41 |
42 | responseDiv.innerHTML = '