├── images ├── rag.png ├── learn.png ├── IRAG-V.gif ├── answer.png ├── chunkviz.png ├── forget.png ├── mod_rag.png ├── no_synth.png ├── question.png ├── RAG-chunks.png ├── change_rag.png ├── chunkviz-1.png ├── embeddings.jpg ├── embeddings.png ├── llm_agent.png ├── mod_rag-2.png ├── rag-agent.png ├── reset_chat.png ├── search-00.png ├── search-01.png ├── search-02.png ├── search-03.png ├── with_synth.png ├── add_sources.png ├── ask_question.png ├── list_sources.png ├── mdb_diagram.png ├── remove_source.png ├── scale_tools.png ├── search-01-1.png ├── search-02-1.png ├── answer_refined.png ├── remove_sources.png ├── actionweaver_mdb.png ├── function_calling.jpeg └── function_calling.png ├── irag-2025.png ├── irag-chunk-mgmt.png ├── .gitignore ├── score-fusion.md ├── implementation.md ├── rag ├── static │ ├── styles.css │ └── script.js ├── templates │ └── index.html └── app.py ├── README.md ├── LICENSE ├── chunk.md └── blog.md /images/rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/rag.png -------------------------------------------------------------------------------- /irag-2025.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/irag-2025.png -------------------------------------------------------------------------------- /images/learn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/learn.png -------------------------------------------------------------------------------- /images/IRAG-V.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/IRAG-V.gif -------------------------------------------------------------------------------- /images/answer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/answer.png -------------------------------------------------------------------------------- /images/chunkviz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/chunkviz.png -------------------------------------------------------------------------------- /images/forget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/forget.png -------------------------------------------------------------------------------- /images/mod_rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/mod_rag.png -------------------------------------------------------------------------------- /images/no_synth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/no_synth.png -------------------------------------------------------------------------------- /images/question.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/question.png -------------------------------------------------------------------------------- /irag-chunk-mgmt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/irag-chunk-mgmt.png -------------------------------------------------------------------------------- /images/RAG-chunks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/RAG-chunks.png -------------------------------------------------------------------------------- /images/change_rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/change_rag.png -------------------------------------------------------------------------------- /images/chunkviz-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/chunkviz-1.png -------------------------------------------------------------------------------- /images/embeddings.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/embeddings.jpg -------------------------------------------------------------------------------- /images/embeddings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/embeddings.png -------------------------------------------------------------------------------- /images/llm_agent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/llm_agent.png -------------------------------------------------------------------------------- /images/mod_rag-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/mod_rag-2.png -------------------------------------------------------------------------------- /images/rag-agent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/rag-agent.png -------------------------------------------------------------------------------- /images/reset_chat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/reset_chat.png -------------------------------------------------------------------------------- /images/search-00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-00.png -------------------------------------------------------------------------------- /images/search-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-01.png -------------------------------------------------------------------------------- /images/search-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-02.png -------------------------------------------------------------------------------- /images/search-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-03.png -------------------------------------------------------------------------------- /images/with_synth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/with_synth.png -------------------------------------------------------------------------------- /images/add_sources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/add_sources.png -------------------------------------------------------------------------------- /images/ask_question.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/ask_question.png -------------------------------------------------------------------------------- /images/list_sources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/list_sources.png -------------------------------------------------------------------------------- /images/mdb_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/mdb_diagram.png -------------------------------------------------------------------------------- /images/remove_source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/remove_source.png -------------------------------------------------------------------------------- /images/scale_tools.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/scale_tools.png -------------------------------------------------------------------------------- /images/search-01-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-01-1.png -------------------------------------------------------------------------------- /images/search-02-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-02-1.png -------------------------------------------------------------------------------- /images/answer_refined.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/answer_refined.png -------------------------------------------------------------------------------- /images/remove_sources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/remove_sources.png -------------------------------------------------------------------------------- /images/actionweaver_mdb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/actionweaver_mdb.png -------------------------------------------------------------------------------- /images/function_calling.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/function_calling.jpeg -------------------------------------------------------------------------------- /images/function_calling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/function_calling.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore virtual environments 2 | .venv 3 | env/ 4 | venv/ 5 | 6 | # Ignore IDE specific settings 7 | .idea/ 8 | .vscode/ 9 | *.pyc 10 | 11 | # Ignore build files 12 | build/ 13 | dist/ 14 | *.egg-info/ 15 | 16 | # Ignore temporary files 17 | *.pyo 18 | *.swp 19 | *~ 20 | 21 | # Ignore test coverage reports 22 | .coverage 23 | 24 | # Ignore local configurations 25 | .env 26 | 27 | # Ignore documentation generated files 28 | docs/_build/ 29 | 30 | # Ignore mypy and pytype cache 31 | .mypy_cache/ 32 | .dmypy.json 33 | dmypy.json 34 | .pyre/ 35 | 36 | # Ignore Cython debug symbols 37 | cython_debug/ 38 | 39 | # Ignore pytest cache 40 | .pytest_cache/ 41 | 42 | # Ignore pipenv specific files 43 | Pipfile.lock -------------------------------------------------------------------------------- /score-fusion.md: -------------------------------------------------------------------------------- 1 | ## Relative Score Fusion for Enhanced Search Results 2 | 3 | This blog post explores a method for combining the power of vector search and full-text search through relative score fusion. This approach utilizes the Sphere dataset, a large corpus for knowledge-intensive NLP tasks. 4 | 5 | **The Scenario:** 6 | 7 | We aim to search for information about companies from the "names" list using both vector search and full-text search. Vector search leverages sentence embeddings for semantic similarity, while full-text search focuses on keyword matching. 8 | 9 | **The Approach:** 10 | 11 | 1. **Data Setup:** 12 | - Sentence embeddings are generated for each company name using the Facebook DPR question encoder model. 13 | - MongoDB collections are used to store the data and facilitate queries. 14 | 2. **Pipeline Breakdown:** 15 | - **Vector Search:** 16 | - The "$vectorSearch" aggregation operator searches for documents with similar vector representations. 17 | - The retrieved documents are assigned a "vs_score" based on their search score and scaled using pre-defined parameters. 18 | - **Full Text Search:** 19 | - The "$search" operator performs full-text search based on the company name. 20 | - Matching documents are assigned an "fts_score" and scaled similarly. 21 | - **Relative Score Fusion:** 22 | - Both sets of results are combined using "$unionWith". 23 | - The "$group" operator aggregates the maximum scores for each document across both search methods. 24 | - The final score is calculated by adding the scaled "vs_score" and "fts_score" for each document. 25 | - The results are then sorted by the final score in descending order, presenting the most relevant documents first. 26 | 27 | **CODE:** 28 | 29 | gist available here: https://gist.github.com/hweller1/d6dbd5036ae4366108b534a0f1662a20 30 | 31 | ``` 32 | vector_agg_with_lookup = [ 33 | { 34 | "$vectorSearch": { 35 | "index": "vector", 36 | "path": "vector", 37 | "queryVector": embedding.tolist(), 38 | "numCandidates": k * overrequest_factor, 39 | "limit": k * 2 40 | } 41 | }, 42 | {"$addFields": {"vs_score": {"$meta": "searchScore"}}}, 43 | { 44 | "$project": { 45 | "vs_score": {"$multiply": ["$vs_score", vector_scalar / vector_normalization]}, 46 | "_id": 1, 47 | "raw": 1, 48 | } 49 | }, 50 | { 51 | "$unionWith": { 52 | "coll": "sphere1mm", 53 | "pipeline": [ 54 | { 55 | "$search": { 56 | "index": "fts_sphere", 57 | "text": {"query": query, "path": "raw"}, 58 | } 59 | }, 60 | {"$limit": k * 2}, 61 | {"$addFields": {"fts_score": {"$meta": "searchScore"}}}, 62 | { 63 | "$project": { 64 | "fts_score": {"$multiply": ["$fts_score", fts_scalar / fts_normalization]}, 65 | "_id": 1, 66 | "raw": 1, 67 | } 68 | }, 69 | ], 70 | } 71 | }, 72 | { 73 | "$group": { 74 | "_id": "$raw", 75 | "vs_score": {"$max": "$vs_score"}, 76 | "fts_score": {"$max": "$fts_score"}, 77 | } 78 | }, 79 | { 80 | "$project": { 81 | "_id": 1, 82 | "raw": 1, 83 | "vs_score": {"$ifNull": ["$vs_score", 0]}, 84 | "fts_score": {"$ifNull": ["$fts_score", 0]}, 85 | } 86 | }, 87 | { 88 | "$project": { 89 | "raw": 1, 90 | "score": {"$add": ["$fts_score", "$vs_score"]}, 91 | "_id": 1, 92 | "vs_score": 1, 93 | "fts_score": 1, 94 | } 95 | }, 96 | {"$limit": k}, 97 | {"$sort": {"score": -1}}, 98 | ] 99 | ``` 100 | 101 | **Benefits:** 102 | 103 | This relative score fusion method offers several advantages: 104 | 105 | - **Improved Search Relevance:** By combining vector search and full-text search, the results capture both semantic similarity and keyword relevance, leading to more accurate and comprehensive answers. 106 | - **Flexibility:** The scaling factors for each score can be adjusted to prioritize either vector search or full-text search based on the specific needs and data characteristics. 107 | - **Scalability:** The aggregation framework allows for efficient execution of the search queries even for large datasets. 108 | 109 | **Future Directions:** 110 | 111 | This work opens up exciting possibilities for further exploration: 112 | 113 | - Investigating different score fusion techniques and weighting schemes. 114 | - Integrating the approach with other search methods, such as entity search. 115 | - Adapting the method to different datasets and NLP applications. 116 | 117 | By leveraging relative score fusion, we can unlock the potential of hybrid search for enhanced information retrieval and deeper understanding of complex queries. 118 | -------------------------------------------------------------------------------- /implementation.md: -------------------------------------------------------------------------------- 1 | ## What is an Agent anyway? 2 | 3 | An agent is a computer program or system designed to perceive its environment, make decisions, and achieve specific goals. 4 | 5 | Think of an agent as a software entity that displays some degree of autonomy and performs actions in its environment on behalf of its user or owner, but in a relatively independent way. It takes initiatives to perform actions on its own by deliberating its options to achieve its goal(s). The core idea of agents is to use a language model to choose a sequence of actions to take. In contrast to chains, where a sequence of actions is hardcoded in code, agents use a language model as a reasoning engine to determine which actions to take and in which order. 6 | 7 | # Building an Interactive-RAG Agent 8 | 9 | Using [ActionWeaver](https://github.com/TengHu/ActionWeaver/tree/main), a lightweight wrapper for function calling API, we can build a user proxy agent that efficiently retrieves and ingests relevant information using MongoDB Atlas. 10 | 11 | A proxy agent is a middleman sending client requests to other servers or resources and then bringing responses back. 12 | 13 | This agent presents the data to the user in an interactive and customizable manner, enhancing the overall user experience. 14 | 15 | The `UserProxyAgent` has several RAG parameters that can be customized, such as `chunk_size`(e.g. 1000), `num_sources`(e.g. 2), `unique`(e.g. True) and `min_rel_score`(e.g. 0.00). 16 | 17 | ``` 18 | class UserProxyAgent: 19 | def __init__(self, logger, st): 20 | # CHUNK RETRIEVAL STRATEGY 21 | self.rag_config = { 22 | "num_sources": 2, 23 | "source_chunk_size": 1000, 24 | "min_rel_score": 0.00, 25 | "unique": True, 26 | } 27 | ``` 28 | 29 | ``` 30 | class RAGAgent(UserProxyAgent): 31 | def __call__(self, text): 32 | text = self.preprocess_query(text) 33 | # PROMPT ENGINEERING HELPS THE LLM TO SELECT THE BEST ACTION/TOOL 34 | agent_rules = f""" 35 | We will be playing a special game. Trust me, you do not want to lose. 36 | 37 | ## RULES 38 | - DO NOT ANSWER DIRECTLY 39 | - ALWAYS USE ONE OF YOUR AVAILABLE ACTIONS/TOOLS. 40 | - PREVIOUS MESSAGES IN THE CONVERSATION MUST BE CONSIDERED WHEN SELECTING THE BEST ACTION/TOOL 41 | - NEVER ASK FOR USER CONSENT TO PERFORM AN ACTION. ALWAYS PERFORM IT THE USERS BEHALF. 42 | Given the following user prompt, select the correct action/tool from your available functions/tools/actions. 43 | 44 | ## USER PROMPT 45 | {text} 46 | ## END USER PROMPT 47 | 48 | SELECT THE BEST TOOL FOR THE USER PROMPT! BEGIN! 49 | """ 50 | self.messages += [{"role": "user", "content": agent_rules + "\n\n## IMPORTANT! REMEMBER THE GAME RULES! DO NOT ANSWER DIRECTLY! IF YOU ANSWER DIRECTLY YOU WILL LOSE. BEGIN!"}] 51 | if ( 52 | len(self.messages) > 2 53 | ): 54 | # if we have more than 2 messages, we may run into: 'code': 'context_length_exceeded' 55 | # we only need the last few messages to know what source to add/remove a source 56 | response = self.llm.create( 57 | messages=self.messages[-2:], 58 | actions=[ 59 | self.read_url, 60 | self.answer_question, 61 | self.remove_source, 62 | self.reset_messages, 63 | self.show_messages, 64 | self.iRAG, 65 | self.get_sources_list, 66 | self.search_web 67 | ], 68 | stream=False, 69 | ) 70 | else: 71 | response = self.llm.create( 72 | messages=self.messages, 73 | actions=[ 74 | self.read_url, 75 | self.answer_question, 76 | self.remove_source, 77 | self.reset_messages, 78 | self.show_messages, 79 | self.iRAG, 80 | self.get_sources_list, 81 | self.search_web 82 | ], 83 | stream=False, 84 | ) 85 | return response 86 | ``` 87 | 88 | ## Why Choose ActionWeaver? 89 | Here are some key benefits that influenced our decision to choose ActionWeaver: 90 | 1. Lightweight and Single-Purposed: ActionWeaver is very lightweight and designed with a singular focus on building LLM applications with function calling. This specialization ensures that it excels in its core function without unnecessary complexity. 91 | 2. Ease of Use: ActionWeaver streamlines the process of integrating external tools into agent's toolkit. Using a simple decorator, developers can effortlessly add any Python function, and it also provides the flexibility to include tools from other ecosystems like LangChain or Llama Index. 92 | 3. Versatility: Despite its simplicity, ActionWeaver offers a wide range of capabilities, including support for forced function execution, parallel function calling and structured data extraction. Such versatility makes it a Swiss Army knife, equipped to handle a variety of AI-related tasks and adapt seamlessly to changing project demands. 93 | 4. Minimal Dependency: ActionWeaver has minimal dependencies, relying only on the openai and pydantic libraries. This reduces the overhead of managing dependencies. 94 | 5. Complex Function Orchestration: The framework empowers us to create intricate sequences of function calls, allowing us to build complex hierarchies or chains of functions. This capability enables us to execute sophisticated workflows with ease. 95 | 96 | ## Key features of OpenAI function calling: 97 | - Function calling allows you to connect large language models to external tools. 98 | - The Chat Completions API generates JSON that can be used to call functions in your code. 99 | - The latest models have been trained to detect when a function should be called and respond with JSON that adheres to the function signature. 100 | - Building user confirmation flows is recommended before taking actions that impact the world on behalf of users. 101 | - Function calling can be used to create assistants that answer questions by calling external APIs, convert natural language into API calls, and extract structured data from text. 102 | - The basic sequence of steps for function calling involves calling the model, parsing the JSON response, calling the function with the provided arguments, and summarizing the results back to the user. 103 | - Function calling is supported by specific model versions, including gpt-4 and gpt-3.5-turbo. 104 | - Parallel function calling allows multiple function calls to be performed together, reducing round-trips with the API. 105 | - Tokens are used to inject functions into the system message and count against the model's context limit and billing. 106 | 107 | ![](./images/function_calling.png) 108 | 109 | Read more at: https://thinhdanggroup.github.io/function-calling-openai/ 110 | 111 | ## ActionWeaver Basics: Actions 112 | 113 | Actions are functions that an agent can invoke. There are two important design considerations around actions: 114 | 115 | Giving the agent access to the right actions 116 | Describing the actions in a way that is most helpful to the agent 117 | 118 | ## Crafting Actions for Effective Agents 119 | 120 | **Actions are the lifeblood of an agent's decision-making.** They define the options available to the agent and shape its interactions with the environment. Consequently, designing effective actions is crucial for building successful agents. 121 | 122 | **Two key considerations guide this design process:** 123 | 124 | 1. **Access to relevant actions:** Ensure the agent has access to actions necessary to achieve its objectives. Omitting critical actions limits the agent's capabilities and hinders its performance. 125 | 2. **Action description clarity:** Describe actions in a way that is informative and unambiguous for the agent. Vague or incomplete descriptions can lead to misinterpretations and suboptimal decisions. 126 | 127 | **By carefully designing actions that are both accessible and well-defined, you equip your agent with the tools and knowledge necessary to navigate its environment and achieve its objectives.** 128 | 129 | **Further considerations:** 130 | 131 | * **Granularity of actions:** Should actions be high-level or low-level? High-level actions offer greater flexibility but require more decision-making, while low-level actions offer more control but limit adaptability. 132 | * **Action preconditions and effects:** Clearly define the conditions under which an action can be taken and its potential consequences. This helps the agent understand the implications of its choices. 133 | 134 | 135 | If you don't give the agent the right actions and describe them in an effective way, you won’t be able to build a working agent. 136 | 137 | ![](./images/llm_agent.png) 138 | 139 | An LLM is then called, resulting in either a response to the user OR action(s) to be taken. If it is determined that a response is required, then that is passed to the user, and that cycle is finished. If it is determined that an action is required, that action is then taken, and an observation (action result) is made. That action & corresponding observation are added back to the prompt (we call this an “agent scratchpad”), and the loop resets, ie. the LLM is called again (with the updated agent scratchpad). 140 | 141 | ![](./images/scale_tools.png) 142 | 143 | The ActionWeaver agent framework is an AI application framework that puts function-calling at its core. It is designed to enable seamless merging of traditional computing systems with the powerful reasoning capabilities of Language Model Models. 144 | ActionWeaver is built around the concept of LLM function calling, while popular frameworks like Langchain and Haystack are built around the concept of pipelines. 145 | 146 | ## Key features of ActionWeaver include: 147 | - Ease of Use: ActionWeaver allows developers to add any Python function as a tool with a simple decorator. The decorated method's signature and docstring are used as a description and passed to OpenAI's function API. 148 | - Function Calling as First-Class Citizen: Function-calling is at the core of the framework. 149 | - Extensibility: Integration of any Python code into the agent's toolbox with a single line of code, including tools from other ecosystems like LangChain or Llama Index. 150 | - Function Orchestration: Building complex orchestration of function callings, including intricate hierarchies or chains. 151 | - Debuggability: Structured logging improves the developer experience. -------------------------------------------------------------------------------- /rag/static/styles.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --mongodb-green-500: #00ED64; 3 | --mongodb-green-600: #00D159; 4 | --gray-900: #121826; 5 | --gray-800: #1d2333; 6 | --gray-700: #333c51; 7 | --gray-600: #4b5563; 8 | --gray-400: #8b94a9; 9 | --blue-500: #3b82f6; 10 | --blue-600: #2563eb; 11 | --yellow-400: #facc15; 12 | --purple-400: #c084fc; 13 | --purple-500: #8b5cf6; 14 | } 15 | 16 | /* Scrollbar styling */ 17 | ::-webkit-scrollbar { 18 | width: 8px; 19 | } 20 | ::-webkit-scrollbar-track { 21 | background: var(--gray-900); 22 | } 23 | ::-webkit-scrollbar-thumb { 24 | background: var(--gray-700); 25 | border-radius: 4px; 26 | } 27 | ::-webkit-scrollbar-thumb:hover { 28 | background: var(--gray-400); 29 | } 30 | 31 | @keyframes fadeIn { 32 | from { opacity: 0; } 33 | to { opacity: 1; } 34 | } 35 | @keyframes fadeInUp { 36 | from { opacity: 0; transform: translateY(15px); } 37 | to { opacity: 1; transform: translateY(0); } 38 | } 39 | .animate-fade-in-up { 40 | animation: fadeInUp 0.4s ease-out forwards; 41 | } 42 | 43 | .typing-dot { 44 | width: 8px; 45 | height: 8px; 46 | background-color: var(--gray-400); 47 | border-radius: 50%; 48 | display: inline-block; 49 | margin: 0 2px; 50 | opacity: 0.6; 51 | animation: typing-bubble 1.2s infinite ease-in-out; 52 | } 53 | .typing-dot:nth-child(2) { animation-delay: 0.15s; } 54 | .typing-dot:nth-child(3) { animation-delay: 0.30s; } 55 | @keyframes typing-bubble { 56 | 0%, 80%, 100% { transform: scale(0.6); opacity: 0.5; } 57 | 40% { transform: scale(1); opacity: 1; } 58 | } 59 | 60 | .tooltip-container { position: relative; } 61 | .tooltip-text { 62 | visibility: hidden; 63 | opacity: 0; 64 | width: max-content; 65 | background-color: var(--gray-900); 66 | color: #fff; 67 | text-align: center; 68 | border-radius: 6px; 69 | padding: 5px 10px; 70 | position: absolute; 71 | z-index: 10; 72 | bottom: 125%; 73 | right: 0; 74 | transition: opacity 0.2s, visibility 0.2s; 75 | font-size: 0.8rem; 76 | pointer-events: none; 77 | border: 1px solid var(--gray-600); 78 | } 79 | .tooltip-container:hover .tooltip-text { 80 | visibility: visible; 81 | opacity: 1; 82 | } 83 | 84 | @keyframes spin { 85 | to { transform: rotate(360deg); } 86 | } 87 | .spinner-large { 88 | width: 2.5rem; 89 | height: 2.5rem; 90 | border: 4px solid var(--mongodb-green-500); 91 | border-right-color: transparent; 92 | border-radius: 50%; 93 | animation: spin .8s linear infinite; 94 | } 95 | 96 | /* Magical Buttons */ 97 | .btn { 98 | font-weight: 600; 99 | padding: 0.5rem 1.25rem; 100 | border-radius: 8px; 101 | border: none; 102 | cursor: pointer; 103 | transition: transform 0.2s cubic-bezier(0.34, 1.56, 0.64, 1), box-shadow 0.2s ease, background-position 0.3s ease; 104 | user-select: none; 105 | outline: none; 106 | display: inline-flex; 107 | align-items: center; 108 | justify-content: center; 109 | } 110 | .btn-primary { 111 | background-image: linear-gradient(145deg, var(--mongodb-green-500) 0%, #00c753 100%); 112 | background-size: 200% 100%; 113 | background-position: right bottom; 114 | color: var(--gray-900); 115 | box-shadow: 0 2px 4px rgba(0,0,0,0.2), inset 0 1px 1px rgba(255,255,255,0.4), inset 0 -1px 1px rgba(0,0,0,0.2); 116 | } 117 | .btn-primary:hover:not(:disabled) { 118 | background-position: left bottom; 119 | transform: translateY(-2px); 120 | box-shadow: 0 4px 8px rgba(0,0,0,0.3), inset 0 1px 1px rgba(255,255,255,0.4), inset 0 -1px 1px rgba(0,0,0,0.2); 121 | } 122 | .btn-primary:active:not(:disabled) { 123 | transform: translateY(1px); 124 | box-shadow: 0 1px 2px rgba(0,0,0,0.3), inset 0 1px 2px rgba(0,0,0,0.4); 125 | transition-duration: 0.1s; 126 | } 127 | .btn-primary:disabled { 128 | opacity: 0.6; 129 | cursor: not-allowed; 130 | } 131 | .btn-secondary { 132 | background-color: var(--gray-700); 133 | border: 1px solid var(--gray-600); 134 | color: white; 135 | box-shadow: 0 2px 4px rgba(0,0,0,0.2); 136 | } 137 | .btn-secondary:hover:not(:disabled) { 138 | background-color: var(--gray-600); 139 | transform: translateY(-2px); 140 | box-shadow: 0 4-8px rgba(0,0,0,0.3); 141 | } 142 | .btn-secondary:active:not(:disabled) { 143 | transform: translateY(1px); 144 | box-shadow: 0 1px 2px rgba(0,0,0,0.3); 145 | transition-duration: 0.1s; 146 | } 147 | .btn.needs-update { 148 | animation: pulseGreen 1.5s infinite; 149 | } 150 | 151 | /* Chat bubble redesign */ 152 | .message.bot-message .bot-avatar { 153 | align-self: flex-start; 154 | transform: translateY(4px); /* Minor vertical alignment adjustment */ 155 | } 156 | .message.bot-message .bot-content { 157 | border-radius: 0.5rem 1.5rem 1.5rem 1.5rem; 158 | } 159 | .message.user-message .user-content { 160 | background-color: var(--mongodb-green-500); 161 | color: white; 162 | border-radius: 1.5rem 0.5rem 1.5rem 1.5rem; 163 | } 164 | 165 | /* Chat bubble spacing */ 166 | .chat-box > *:not(:first-child) { 167 | margin-top: 1.5rem; 168 | } 169 | .chat-box > .user-message + .bot-message, 170 | .chat-box > .bot-message + .user-message { 171 | margin-top: 2rem; 172 | } 173 | 174 | /* Source link redesign */ 175 | .source-links a { 176 | background-color: var(--gray-600); 177 | color: var(--mongodb-green-500); 178 | padding: 0.25rem 0.75rem; 179 | border-radius: 9999px; 180 | font-weight: 500; 181 | transition: all 0.2s ease; 182 | display: inline-flex; 183 | align-items: center; 184 | gap: 0.25rem; 185 | } 186 | .source-links a:hover { 187 | background-color: var(--mongodb-green-600); 188 | color: var(--gray-900); 189 | transform: translateY(-2px); 190 | } 191 | .source-links a:active { 192 | transform: translateY(0); 193 | } 194 | 195 | /* Chunk Card Styles */ 196 | .chunk-card { 197 | background-color: var(--gray-800); 198 | border: 1px solid var(--gray-700); 199 | border-radius: 8px; 200 | overflow: hidden; 201 | transition: all 0.2s ease; 202 | } 203 | .chunk-card:hover { 204 | border-color: var(--mongodb-green-500); 205 | box-shadow: 0 0 15px rgba(0, 237, 100, 0.1); 206 | } 207 | .chunk-header { 208 | display: flex; 209 | justify-content: space-between; 210 | align-items: center; 211 | padding: 0.5rem 0.75rem; 212 | background-color: var(--gray-700); 213 | border-bottom: 1px solid var(--gray-600); 214 | } 215 | .chunk-title { 216 | font-size: 0.8rem; 217 | font-weight: bold; 218 | color: var(--mongodb-green-500); 219 | overflow: hidden; 220 | text-overflow: ellipsis; 221 | white-space: nowrap; 222 | } 223 | .chunk-content { 224 | padding: 0.75rem; 225 | font-size: 0.9rem; 226 | color: var(--gray-300); 227 | max-height: 150px; 228 | overflow-y: auto; 229 | } 230 | .chunk-list-container { 231 | display: flex; 232 | flex-direction: column; 233 | gap: 0.75rem; 234 | } 235 | 236 | /* Floating Logo Animation */ 237 | @keyframes gentle-float { 238 | 0% { transform: translateY(0); } 239 | 50% { transform: translateY(-8px) rotate(2deg); } 240 | 100% { transform: translateY(0); } 241 | } 242 | .logo-chunk-group { 243 | animation: gentle-float 10s ease-in-out infinite; 244 | } 245 | 246 | /* Control Panel Styles */ 247 | .control-panel-section { 248 | background-color: var(--gray-900); 249 | border: 1px solid var(--gray-700); 250 | border-radius: 12px; 251 | padding: 1rem; 252 | } 253 | .control-panel-title { 254 | font-size: 0.9rem; 255 | font-weight: 700; 256 | color: var(--mongodb-green-500); 257 | margin-bottom: 0.75rem; 258 | padding-bottom: 0.5rem; 259 | border-bottom: 1px solid var(--gray-700); 260 | } 261 | 262 | /* Source Browser Styles */ 263 | .sb-column-header { 264 | display: flex; 265 | justify-content: space-between; 266 | align-items: center; 267 | padding: 0 0.25rem 0.75rem 0.25rem; 268 | margin-bottom: 0.5rem; 269 | border-bottom: 1px solid var(--gray-700); 270 | font-size: 0.9rem; 271 | font-weight: 600; 272 | color: var(--gray-300); 273 | flex-shrink: 0; 274 | } 275 | 276 | .sb-count-badge { 277 | font-size: 0.75rem; 278 | font-weight: 500; 279 | background-color: var(--gray-700); 280 | color: var(--gray-300); 281 | padding: 2px 8px; 282 | border-radius: 9999px; 283 | flex-shrink: 0; 284 | margin-left: 0.5rem; 285 | } 286 | 287 | .source-item { 288 | display: flex; 289 | justify-content: space-between; 290 | align-items: center; 291 | width: 100%; 292 | padding: 0.5rem 0.75rem; 293 | border-radius: 6px; 294 | cursor: pointer; 295 | transition: background-color 0.2s, color 0.2s; 296 | text-align: left; 297 | border: none; 298 | background-color: transparent; 299 | color: var(--gray-300); 300 | font-size: 0.9rem; 301 | } 302 | .source-item:hover { 303 | background-color: var(--gray-700); 304 | } 305 | .source-item.active { 306 | background-color: var(--mongodb-green-500); 307 | color: var(--gray-900); 308 | font-weight: bold; 309 | } 310 | .source-item.active .sb-count-badge { 311 | background-color: var(--gray-800); 312 | color: var(--mongodb-green-500); 313 | } 314 | 315 | 316 | /* Style for the file drop zone when dragging a file over it */ 317 | .drop-zone-dragover { 318 | border-color: var(--mongodb-green-500) !important; 319 | background-color: rgba(0, 237, 100, 0.1); 320 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Interactive RAG Agent 2 | 3 | ![](irag-2025.png) 4 | 5 | 6 | ## The RAG Revolution: From Fragmented Mess to Unified Intelligence 7 | 8 | Large Language Models (LLMs) are transforming our world, but they have a fundamental limitation: they only know what they were trained on. To make them truly useful for specific, real-world tasks, we need to ground them in our own data. This is the promise of **Retrieval-Augmented Generation (RAG)**, a technique that gives an LLM access to a relevant knowledge base. 9 | 10 | However, many RAG systems are built on a shaky foundation. They're a fragmented mess of different databases and systems cobbled together, making them brittle, inefficient, and difficult to manage. 11 | 12 | But there's a better way. By combining the art of intelligent document **chunking** with a **unified data architecture**, we can build RAG agents that are not just powerful, but also flexible, manageable, and truly intelligent. This guide will show you how. 13 | 14 | ----- 15 | 16 | ## The Problem: The Frankenstein's Monster of RAG Architectures 17 | 18 | Let's be honest: a typical RAG setup often looks like a digital Frankenstein. Your raw documents live in one place, their vector embeddings are stored in a separate vector database, and the metadata that gives them context is tucked away somewhere else entirely. 🧟 19 | 20 | This siloed approach creates a nightmare for anyone trying to build, maintain, or improve the system: 21 | 22 | * **Painful Updates:** How do you update a single piece of information and ensure its vector and metadata are changed everywhere, atomically? 23 | * **Stifled Experimentation:** Want to test a new, better embedding model? Get ready to build an entirely new, parallel system and migrate all your data. 24 | * **Slow, Complex Queries:** Every question requires complex joins across different databases, adding latency and making the system a headache to scale. 25 | 26 | This fragmented architecture simply can’t keep up with the pace of modern AI. 27 | 28 | ----- 29 | 30 | ## The Unified Solution: A Single Source of Truth 31 | 32 | The key to building a smarter RAG system is to create a single source of truth using a flexible document model. Instead of scattering your data across multiple systems, every chunk of your knowledge is stored as a single, self-contained JSON document in a database like MongoDB. 33 | 34 | ```json 35 | { 36 | "_id": ObjectId("..."), 37 | "text": "MongoDB's document model stores data as BSON documents...", 38 | "metadata": { 39 | "source": "https://www.mongodb.com/docs/", 40 | "source_type": "url", 41 | "session_id": "product_faq" 42 | }, 43 | "embedding_openai": [0.123, 0.456, ...], 44 | "embedding_voyageai": [0.789, 0.101, ...] 45 | } 46 | ``` 47 | 48 | This elegant structure immediately solves our biggest problems and unlocks new capabilities: 49 | 50 | * **🧪 Experiment in Minutes, Not Months:** The schema-agnostic model lets you store vectors from multiple embedding models in the *same document*. You can easily A/B test a new model by simply adding a new field—no complex data migration required. 51 | * **🎯 Achieve Pinpoint Accuracy:** You can perform a vector search while simultaneously filtering on any metadata field. This lets you instantly scope a search to a specific user session or document type, dramatically increasing the relevance of your results. 52 | 53 | This unified approach streamlines the entire RAG pipeline, bringing your data, its context, and its vector representations together into one cohesive whole. 54 | 55 | ----- 56 | 57 | ## The Foundation: The Art of Intelligent Chunking 58 | 59 | ![](irag-chunk-mgmt.png) 60 | 61 | With our data model in place, we need to prepare the content. The performance of any RAG system hinges on a well-chunked knowledge base. Breaking a document into pieces sounds simple, but doing it *intelligently* is crucial. 62 | 63 | Using a tool like **LangChain's `RecursiveCharacterTextSplitter`** is a great start. It intelligently breaks down documents by trying to keep paragraphs and sentences whole, which is vital for preserving the semantic meaning of the text. 64 | 65 | You can control this process with two key "tuning knobs": 66 | 67 | * **Chunk Size:** A starting point of **1,000 characters** is a good balance. It's small enough for precise retrieval but large enough to contain meaningful context. 68 | * **Chunk Overlap:** An overlap of **150 characters** creates a contextual bridge between adjacent chunks. This ensures that an important idea isn't awkwardly split in two right at a boundary. 69 | 70 | ----- 71 | 72 | ## The Conversation: Tuning for Precision 73 | 74 | Once your knowledge is ingested, getting the best answers requires fine-grained control over the retrieval process. Think of it as a conversation with your data, and you have the dials to control the clarity. 75 | 76 | ### The Quality Bouncer: `min_rel_score` 77 | 78 | The **minimum relevance score** acts as a critical quality filter—like a bouncer at a club, it only lets in high-quality information. Vector search ranks results by similarity, assigning a score from 0 to 1. By setting a threshold (e.g., 0.80), you tell the agent to ignore any chunks that aren't a strong match for the query. 79 | 80 | This empowers your agent to confidently say, "I don't know," rather than trying to invent an answer from low-quality context. This is a hallmark of an intelligent system, preventing "garbage-in, garbage-out" scenarios. 81 | 82 | ### The Context Dial: `num_sources` (k) 83 | 84 | The **`num_sources`** parameter (often called 'k') is your context dial. It determines how many of the top-ranking chunks the agent retrieves to answer a question. 85 | 86 | * **For specific, factual questions,** you want a focused beam of light. A small `k` (e.g., 3) is ideal. 87 | * **For open-ended, brainstorming queries,** you need a floodlight. A larger `k` (e.g., 10) provides the broader context necessary for a comprehensive response. 88 | 89 | This simple dial allows you to perfectly balance the need for concise answers with comprehensive ones. 90 | 91 | ----- 92 | 93 | ## The Agent's Edge: A Living, Organized Knowledge Base 94 | 95 | A truly intelligent RAG agent doesn't just *read* its knowledge base—it helps *manage* it. Because each chunk is a unique document with its own `_id`, the agent can perform standard database operations. 96 | 97 | Imagine a user points out that a company policy has changed. The agent can use a tool to execute a command like this: 98 | 99 | `update_chunk(chunk_id='...', new_content='The new policy takes effect on Jan 1, 2026.')` 100 | 101 | This transforms the RAG system from a static library into a **living knowledge base** that can be corrected and updated in real time. 🧠 This crucial capability is often impossible in fragmented RAG applications. 102 | 103 | To manage this evolving knowledge, the agent uses **sessions**—distinct workspaces with their own isolated knowledge and chat history. This ensures that when you're working on "Project Alpha," you're only getting answers from the "Project Alpha" knowledge base, keeping your conversations clean and contextually relevant. 104 | 105 | ----- 106 | 107 | ## Conclusion 108 | 109 | By moving away from fragmented architectures and embracing a unified approach, you can build AI agents that are not only more powerful but also infinitely more manageable. MongoDB’s document model simplifies data management, intelligent chunking enhances retrieval quality, and tunable parameters give you the control to refine results. 110 | 111 | Most importantly, by treating each chunk as a self-contained, editable entity, your knowledge base can grow and evolve. This is the foundation for a truly dynamic and intelligent AI system, ready for the future. 112 | 113 | ----- 114 | 115 | ----- 116 | 117 | ## Appendix: Under the Hood of a Unified RAG System 118 | 119 | ### The Strategic Value of a Single Document 120 | 121 | So, why is keeping everything in one document so revolutionary for RAG? Let's revisit the hard questions posed by fragmented systems: 122 | 123 | * **How do you A/B test a new embedding model** without building an entirely new system and migrating all your data? 124 | * **How do you perform a similarity search that's also filtered by user metadata** (like `session_id`) without slow, expensive joins between databases? 125 | * **How do you update or delete a single chunk** and guarantee its vector and metadata are handled atomically? 126 | 127 | The unified document model solves these problems with elegance. A single ingestion process can generate embeddings from multiple models and store them right next to the text and its metadata. 128 | 129 | ```json 130 | { 131 | "text": "The quick brown fox...", 132 | "metadata": { 133 | "source": "example.txt", 134 | "session_id": "project_alpha" 135 | }, 136 | "embedding_openai": [0.01, 0.02, ...], 137 | "embedding_voyageai": [0.98, 0.97, ...] 138 | } 139 | ``` 140 | 141 | This structure provides immense flexibility and future-proofs your architecture. As better models emerge, you can adopt them without disruption. 142 | 143 | ### The Agent's Toolkit: Tools and Pipelines 144 | 145 | The agent interacts with this unified database using **tools**. In a framework like LangChain, a tool is a function the LLM can decide to call based on the user's query. 146 | 147 | Our `search_knowledge_base` tool is powered by a **MongoDB Aggregation Pipeline**, which is where the magic happens. 148 | 149 | ```python 150 | @tool 151 | def search_knowledge_base(query: str, embedding_model: str, num_sources: int = 3) -> str: 152 | """Query the knowledge base to find relevant chunks for `query`.""" 153 | 154 | # Select the correct vector field based on the user's choice 155 | model_config = EMBEDDING_CONFIG[embedding_model] 156 | query_vector = config.embedding_clients[embedding_model].embed_query(query) 157 | 158 | pipeline = [ 159 | # Stage 1: Perform the vector search and metadata filtering in one step 160 | { 161 | "$vectorSearch": { 162 | "index": model_config['index_name'], 163 | "path": model_config['vector_field'], # Dynamically use the right embedding 164 | "queryVector": query_vector, 165 | "limit": num_sources, 166 | "numCandidates": num_sources * 10, 167 | "filter": { 168 | # Only search within the current user's session 169 | "metadata.session_id": {"$eq": config.current_session} 170 | } 171 | } 172 | }, 173 | # Stage 2: Reshape the output for the LLM 174 | { 175 | "$project": { 176 | "score": {"$meta": "vectorSearchScore"}, 177 | "content": "$text", 178 | "source": "$metadata.source" 179 | } 180 | } 181 | ] 182 | results = list(config.collection.aggregate(pipeline)) 183 | return format_results(results) 184 | ``` 185 | 186 | This pipeline is where the power of the unified model becomes clear. The `$vectorSearch` stage efficiently finds the most semantically similar chunks while *simultaneously* applying a `filter` on the metadata. This is a highly optimized, single-database operation that is far more efficient than coordinating searches across separate systems. This architecture paves the way for even more advanced strategies, all within the same powerful pipeline. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /chunk.md: -------------------------------------------------------------------------------- 1 | ## Chunking: A Hidden Hero in the Rise of GenAI 2 | 3 | ## ![Alt text](https://cdn.stackoverflow.co/images/jo7n4k8s/production/ef172115fca9aa6b3b99eeb1c749acf9f8c183a0-6000x3150.png?w=1200&h=630&auto=format&dpr=2) 4 | 5 | The recent boom in Large Language Models (LLMs) has opened up a new world of possibilities for understanding and interacting with language. One of the most exciting applications is their ability to automatically summarize long documents, saving us valuable time and effort. However, effectively summarizing longer documents with LLMs still presents some challenges. This blog post dives into the often-overlooked but crucial role of "chunking" and its potential to unlock the full power of LLMs in document summarization, particularly within the context of the Retrieval-Augmented Generation (RAG) model, **powered by the innovative capabilities of MongoDB Atlas Vector Search.** 6 | 7 | **RAG and the Chunking Puzzle:** 8 | 9 | RAG takes a two-pronged approach to summarization, combining the strengths of both information retrieval and text generation. It first identifies relevant passages within the document based on a query and then uses an LLM to craft a concise and informative summary of those passages. However, the effectiveness of this process hinges heavily on how the document is divided into smaller units, known as "chunks." Chunks that are too large can overwhelm the LLM, leading to inaccurate or incomplete summaries. Conversely, chunks that are too small may not provide enough context for the LLM to understand the overall message of the document. 10 | 11 | **The Quest for the Optimal Chunk:** 12 | 13 | Researchers have been actively exploring various chunking strategies to optimize the performance of RAG. Here are some: 14 | 15 | * **Fixed-size chunks with overlap:** This method involves dividing the document into chunks of a predetermined size, ensuring sufficient context while minimizing information loss at chunk boundaries. By leveraging the $vectorSearch operator, we can now perform efficient Approximate Nearest Neighbor (ANN) searches within each chunk, ensuring we retrieve the most relevant passages for summarization. 16 | * **Recursive chunking:** This strategy takes an iterative approach, starting with the entire document and then splitting each chunk into smaller and smaller pieces. This allows for fine-grained control over the level of detail and context presented to the LLM. MongoDB Atlas Vector Search's vector representation of document content empowers us to perform hierarchical chunking, efficiently identifying the most relevant sub-topics within each segment. 17 | * **Paragraph-based chunking:** This method utilizes natural paragraph breaks to define chunk boundaries, making it suitable for documents with well-defined paragraphs. However, it may not be ideal for texts with more unstructured content. Here, the $filter capabilities of MongoDB Atlas Vector Search come in handy, allowing us to filter chunks based on specific keywords or semantic similarity to ensure we focus on the most relevant parts of the document. 18 | * **Single-page chunks:** This simple approach uses entire pages as individual chunks. While efficient, it may not capture crucial details or effectively address the limitations of LLM processing capabilities. By leveraging the hybrid search capabilities of MongoDB Atlas Vector Search, we can combine traditional keyword search with vector similarity to achieve optimal chunk retrieval, even for single-page documents. 19 | 20 | **Other Strategies** 21 | 22 | **Parent Document Retrieval Strategies for RAG:** 23 | 24 | The effectiveness of RAG relies heavily on the initial retrieval of relevant passages from the "parent document." Here are some key strategies: 25 | 26 | * **Keyword matching:** This traditional approach involves matching keywords from the query to keywords within the document. While simple and efficient, it may not capture the full semantic meaning of the query or the document. 27 | * **Passage embedding and retrieval:** This strategy uses vector representations of both the query and the document passages. This allows for more precise retrieval based on semantic similarity, even if the exact keywords don't match. MongoDB Atlas Vector Search excels at this, enabling efficient and accurate retrieval of relevant passages using the $vectorSearch operator. 28 | * **Hybrid search:** This approach combines keyword matching with passage embedding and retrieval. This leverages the strengths of both methods, ensuring both high recall (finding all relevant passages) and high precision (finding only relevant passages). 29 | 30 | ``` 31 | agg_pipeline = [{ 32 | "$vectorSearch": { 33 | "index":'nested_search_index', 34 | "path": "text_embedding", 35 | "queryVector": query_vector, 36 | "limit": k, 37 | "numCandidates": k * multiplier, 38 | }, 39 | }, 40 | }, 41 | { 42 | "$match": {"sample_question": {"$exists": False}} 43 | }, 44 | { 45 | "$project": {"text_embedding": 0} 46 | }, 47 | { 48 | '$lookup' : {"from": "hnsw_parent_retrieval_example", 49 | "localField": "parent_id", 50 | "foreignField": "_id", 51 | "as": 'parent_documents' 52 | }}, 53 | {'$unwind': {"path": "$parent_documents"}}, 54 | {"$limit": k} 55 | ] 56 | ``` 57 | 58 | This aggregation pipeline in MongoDB Atlas vector search retrieves relevant documents based on a query vector and performs further filtering and processing. Here's a breakdown of each stage: 59 | 60 | **Stage 1: $vectorSearch:** 61 | 62 | - **index:** Specifies the name of the vector search index used for retrieval. 63 | - **path:** Defines the path within each document where the text embedding vector is stored (assumed to be "text_embedding"). 64 | - **queryVector:** The vector representation of the query used for semantic search. 65 | - **limit:** Maximum number of documents to retrieve (k). 66 | - **numCandidates:** Number of candidate documents to consider before filtering (k * multiplier). This helps ensure enough relevant documents are retrieved even after filtering. 67 | 68 | **Stage 2: $match:** 69 | 70 | - **"sample_question": {"$exists": False}}:** This filters out documents having a field named "sample_question", ensuring we only deal with documents relevant to the current task. 71 | 72 | **Stage 3: $project:** 73 | 74 | - **"text_embedding": 0:** Excludes the "text_embedding" field from the output documents, potentially reducing document size and improving efficiency. 75 | 76 | **Stage 4: $lookup:** 77 | 78 | - **"from": "hnsw_parent_retrieval_example":** Specifies the name of the collection containing parent documents. 79 | - **"localField": "parent_id"**: Identifies the field in the current document that stores the parent document ID. 80 | - **"foreignField": "_id"**: Identifies the field in the parent document collection that stores the document ID. 81 | - **"as": 'parent_documents'**: Defines the alias for the retrieved parent documents in the output. 82 | 83 | **Stage 5: $unwind:** 84 | 85 | - **{"path": "$parent_documents"}**: "Unwinds" the "parent_documents" array, creating separate documents for each parent document associated with the current document. 86 | 87 | **Stage 6: $limit:** 88 | 89 | - **"limit": k**: Limits the final output to only the k most relevant documents, potentially based on a combination of vector search relevance and information from the parent documents. 90 | 91 | Overall, this pipeline uses vector search to retrieve relevant documents based on a query vector, performs further filtering and exclusion, associates each document with its corresponding parent document, and finally returns the k most relevant documents. 92 | 93 | ## Beyond Retrieval: Unlocking Deeper Insights with Question-Driven Chunking and LLM Processing 94 | 95 | Imagine summarizing a news article about a groundbreaking scientific discovery. You've retrieved a relevant chunk, brimming with technical jargon and intricate concepts. To truly grasp the essence of this discovery and prepare the information for LLM-based summarization, a more proactive approach is needed. Here's where **question-driven chunking**, powered by MongoDB Atlas Vector Search, comes into play. 96 | 97 | Instead of passively processing the entire chunk, we can ask a targeted question like: "What are the key implications of this new discovery for the field of medicine?" This simple act transforms the process from passive consumption to active exploration, focusing the LLM's attention on the most relevant information. 98 | 99 | **Leveraging the Power of Vector Search:** 100 | 101 | Through the magic of MongoDB Atlas Vector Search, both the question and the chunk are embedded into a "semantic landscape." This allows us to search for the sentence within the chunk that best aligns with the question's meaning, regardless of exact word matches. This targeted approach unlocks several key benefits: 102 | 103 | * **Enhanced Understanding:** By focusing solely on the relevant answer sentence, the LLM receives the most crucial information, leading to a more accurate and insightful summary. 104 | * **Reduced Workload:** The LLM doesn't have to sift through the entire chunk, minimizing processing time and computational resources. 105 | * **Unveiling Deeper Connections:** Asking questions allows us to uncover hidden insights within the information, generating summaries that go beyond just factual details. 106 | 107 | **The Power of LLM Processing:** 108 | 109 | Once the answer sentence is extracted through MongoDB Atlas Vector Search, the LLM can be used to further refine and summarize the extracted information. This process involves: 110 | 111 | * **Contextualization:** Providing the LLM with additional context, such as the original question, relevant sentences from the chunk, and the desired length and key points for the summary. 112 | * **LLM Processing:** The LLM then leverages its capabilities to extract key information, rephrase the answer sentence for clarity and conciseness, and ultimately generate a concise and informative summary. 113 | * **Integration:** This LLM-generated summary can be integrated into a larger summarization system that combines summaries from multiple chunks, performs fact-checking, and offers different summarization styles for diverse audiences and purposes. 114 | 115 | **A New Frontier for Text Analysis:** 116 | 117 | By combining the power of question-driven chunking with LLM processing, we unlock a new level of sophistication in text analysis and summarization. This approach allows us to: 118 | 119 | * Extract the most relevant and insightful information from complex documents. 120 | * Generate summaries that are not only factually accurate but also tailored to specific needs and goals. 121 | * Open up exciting possibilities for utilizing LLM technology for a wide range of applications. 122 | 123 | This is not just about summarizing text; it's about unlocking deeper understanding and transforming information into meaningful insights. By embracing a question-driven approach and leveraging the power of LLM processing, we open a new chapter in the field of text analysis and summarization, paving the way for a more insightful and impactful future. 124 | 125 | **Beyond Chunking: LLM-powered Enhancements:** 126 | 127 | Several innovative approaches leverage LLMs to further improve chunking effectiveness, all powered by MongoDB Atlas Vector Search: 128 | 129 | * **LLM pre-summarization:** This strategy involves using an LLM to pre-summarize the content of each chunk before feeding it to the main RAG model. This significantly reduces the workload for the LLM and can lead to more accurate summarization. By storing pre-summarized content as vectors within MongoDB Atlas Vector Search, we can further enhance query efficiency and enable efficient retrieval of relevant chunks. 130 | * **Static text generation from structured data:** This technique leverages LLMs to generate a static textual representation of the information within each chunk. This can be particularly useful for summarizing documents containing complex data structures, such as tables or figures. MongoDB Atlas Vector Search allows us to store and search these generated texts alongside the original data, enabling a more comprehensive understanding of the document's content. 131 | * **Exchange boundary chunking:** This method is specifically designed for dialogue transcripts and involves splitting the transcript based on speaker changes. This allows the LLM to capture the flow of conversation and generate more accurate summaries. In conjunction with MongoDB Atlas Vector Search, we can perform speaker identification and topic segmentation, further optimizing chunk retrieval for dialogue-based content. 132 | 133 | ## Examples 134 | 135 | **Example 1: Fixed-size chunks with overlap and the $vectorSearch operator:** 136 | 137 | Imagine you're summarizing a 10-page research paper using the RAG model. Utilizing MongoDB Atlas Vector Search, you can: 138 | 139 | 1. **Divide the document into fixed-size chunks**, say 1000 words each, with a 500-word overlap. This ensures sufficient context while minimizing information loss at chunk boundaries. 140 | 2. **Within each chunk, leverage the $vectorSearch operator to perform efficient ANN searches**. This allows you to identify the most relevant sentences within each chunk, based on your query or specific keywords. 141 | 3. **Feed these retrieved sentences to the LLM for summarization**, ensuring that the final summary focuses on the most crucial aspects of the document. 142 | 143 | **Example 2: Recursive chunking and vector representation:** 144 | 145 | Consider summarizing a legal document with complex nested structures. Using MongoDB Atlas Vector Search, you can: 146 | 147 | 1. **Start by dividing the document into its main sections**. 148 | 2. **For each section, utilize the vector representation of its content to identify sub-topics**. 149 | 3. **Recursively apply this process**, further dividing each sub-topic into smaller and more focused segments. 150 | 4. **This hierarchical chunking approach, powered by vector similarity, ensures that the LLM receives relevant and contextually rich information for summarization.** 151 | 152 | **Example 3: Paragraph-based chunking with $filter:** 153 | 154 | You want to summarize an online news article. Utilizing MongoDB Atlas Vector Search, you can: 155 | 156 | 1. **Divide the document into natural paragraph breaks**. 157 | 2. **Apply the $filter operator to filter chunks based on specific keywords** related to your query or area of interest. 158 | 3. **This ensures that the LLM focuses solely on the most relevant sections of the article**, generating a concise and informative summary. 159 | 160 | **Example 4: Single-page chunks and hybrid search:** 161 | 162 | You need to summarize a product manual with minimal text but lots of diagrams and figures. Using MongoDB Atlas Vector Search, you can: 163 | 164 | 1. **Treat each page as a single chunk**. 165 | 2. **Employ hybrid search capabilities**, combining traditional keyword search with vector similarity. 166 | 3. **This allows you to retrieve relevant chunks based on both textual content and visual information embedded within the diagrams and figures**. 167 | 4. **The LLM can then process these retrieved chunks to generate a comprehensive and accurate summary of the entire product manual.** 168 | 169 | **Example 5: LLM pre-summarization and vector storage:** 170 | 171 | Imagine you have a large corpus of scientific articles that you need to summarize regularly. With MongoDB Atlas Vector Search, you can: 172 | 173 | 1. **Pre-summarize each article using an LLM**. 174 | 2. **Store these pre-summarized texts as vectors within MongoDB Atlas Vector Search**. 175 | 3. **This allows for efficient query processing and retrieval of relevant summaries**, significantly reducing the workload on the main LLM. 176 | 4. **When a new query arrives, you can first search for pre-existing summaries based on vector similarity**. This can potentially provide instant results for common queries, saving valuable time and computational resources. 177 | 178 | These are just a few examples of how MongoDB Atlas Vector Search can be used to enhance the effectiveness of chunking in LLM-based summarization tasks. By leveraging its powerful search and storage capabilities, researchers and developers can unlock the full potential of LLMs and achieve even better performance in document understanding and summarization. 179 | 180 | 181 | **The Future of Chunking: Unlocking the Full Potential of LLMs:** 182 | 183 | Chunking experimentation, empowered by MongoDB Atlas Vector Search, is an exciting field with the potential to revolutionize the way LLMs approach document summarization. By exploring and optimizing different chunking strategies, researchers are paving the way for LLMs to generate informative, accurate, and concise summaries of even the most complex documents. 184 | -------------------------------------------------------------------------------- /rag/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Interactive RAG Agent // MongoDB Edition 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
17 |
18 |
22 |
23 | 30 | MongoDB Themed Data Chunks 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 |

45 | Interactive RAG Agent 46 |

47 |
48 |
49 | 50 |
51 |
54 | Welcome! Use the Control Panel on the right to manage 55 | sessions, add data, and fine-tune retrieval settings. 56 |
57 |
58 | 59 | 68 | 69 |
70 |
71 |
72 | 98 |
99 | Preview the RAG context for your query before sending 100 |
101 |
102 |
103 | 104 |
105 | 111 | 130 |
131 |
132 |
133 | 134 |
135 |
136 |

Session Management

137 |
138 |
139 |

140 | Each session is a distinct workspace with its own chat history and knowledge base. 141 |

142 |
143 |
144 | 147 | 151 |
152 | 155 | 161 |
162 |
163 | 164 |
165 |

Knowledge Base Tools

166 |
167 |
168 | 188 |
189 | 190 |
191 | 211 |
212 | 213 |
214 | 234 |
235 | 236 |
237 | 257 |
258 | 259 |
260 | 280 |
281 | 282 |
283 | 303 |
304 |
305 |
306 | 307 |
308 |

Retrieval Settings

309 |
310 |
311 | 314 | 318 |
319 | 320 |
321 | 325 | 333 |
334 | 335 |
336 |
337 | 340 | 0.00 343 |
344 | Minimum relevance score (0.0 to 1.0). Chunks below this are discarded. 345 |
346 |
347 | 356 |
357 | 358 |
359 |
360 | 363 | 2000 366 |
367 | Max characters per retrieved chunk. Truncates longer chunks. 368 |
369 |
370 | 379 |
380 |
381 |
382 |
383 |
384 | 385 | 402 | 403 | 446 | 447 | 448 | -------------------------------------------------------------------------------- /blog.md: -------------------------------------------------------------------------------- 1 | # Interactive RAG with MongoDB Atlas + Function Calling API 2 | ## Introduction: Unveiling the Power of Interactive Knowledge Discovery 3 | 4 | Imagine yourself as a detective investigating a complex case. Traditional retrieval-augmented generation (RAG) acts as your static assistant, meticulously sifting through mountains of evidence based on a pre-defined strategy. While helpful, this approach lacks the flexibility needed for today's ever-changing digital landscape. 5 | 6 | Enter interactive RAG – the next generation of information access. It empowers users to become active knowledge investigators by: 7 | 8 | * **Dynamically adjusting retrieval strategies:** Tailor the search to your specific needs by fine-tuning parameters like the number of sources, chunk size, and retrieval algorithms. 9 | * **Staying ahead of the curve:** As new information emerges, readily incorporate it into your retrieval strategy to stay up-to-date and relevant. 10 | * **Enhancing LLM performance:** Optimize the LLM's workload by dynamically adjusting the information flow, leading to faster and more accurate analysis. 11 | 12 | Before you continue, make sure you understand the basics of: 13 | 14 | - [LLMs](https://www.mongodb.com/basics/large-language-models). 15 | - [ RAG](https://www.mongodb.com/basics/retrieval-augmented-generation). 16 | - [Using a vector database](https://www.mongodb.com/basics/vector-databases). 17 | 18 | ## ![Retrieval Augmented Generation - Diagram 1](./images/RAG-chunks.png) 19 | (_image from [Session 7: RAG Evaluation with RAGAS and How to Improve Retrieval](https://www.youtube.com/watch?v=mEv-2Xnb_Wk))_ 20 | 21 | ## Optimizing your retrieval strategy: static vs. interactive RAG 22 | 23 | Choosing between static and interactive retrieval-augmented generation approaches is crucial for optimizing your application's retrieval strategy. Each approach offers unique advantages and disadvantages, tailored to specific use cases: 24 | 25 | **Static RAG:** A static RAG approach is pre-trained on a fixed knowledge base, meaning the information it can access and utilize is predetermined and unchanging. This allows for faster inference times and lower computational costs, making it ideal for applications requiring real-time responses, such as chatbots and virtual assistants. 26 | 27 | **Pros:** 28 | 29 | * **Faster response:** Pre-loaded knowledge bases enable rapid inference, ideal for real-time applications like chatbots and virtual assistants. 30 | * **Lower cost:** Static RAG requires fewer resources for training and maintenance, making it suitable for resource-constrained environments. 31 | * **Controlled content:** Developers have complete control over the model's knowledge base, ensuring targeted and curated responses in sensitive applications. 32 | * **Consistent results:** Static RAG provides stable outputs even when underlying data changes, ensuring reliability in data-intensive scenarios. 33 | 34 | **Cons:** 35 | 36 | * **Limited knowledge:** Static RAG is confined to its pre-loaded knowledge, limiting its versatility compared to interactive RAG accessing external data. 37 | * **Outdated information:** Static knowledge bases can become outdated, leading to inaccurate or irrelevant responses if not frequently updated. 38 | * **Less adaptable:** Static RAG can struggle to adapt to changing user needs and preferences, limiting its ability to provide personalized or context-aware responses. 39 | 40 | **Interactive RAG:** An interactive RAG approach is trained on a dynamic knowledge base, allowing it to access and process real-time information from external sources such as online databases and APIs. This enables it to provide up-to-date and relevant responses, making it suitable for applications requiring access to constantly changing data. 41 | 42 | **Pros:** 43 | 44 | * **Up-to-date information:** Interactive RAG can access and process real-time external information, ensuring current and relevant responses, which is particularly valuable for applications requiring access to frequently changing data. 45 | * **Greater flexibility:** Interactive RAG can adapt to user needs and preferences by incorporating feedback and interactions into their responses, enabling personalized and context-aware experiences. 46 | * **Vast knowledge base:** Access to external information provides an almost limitless knowledge pool, allowing interactive RAG to address a wider range of queries and deliver comprehensive and informative responses. 47 | 48 | **Cons:** 49 | 50 | * **Slower response:** Processing external information increases inference time, potentially hindering real-time applications. 51 | * **Higher cost:** Interactive RAG requires more computational resources, making it potentially unsuitable for resource-constrained environments. 52 | * **Bias risk:** External information sources may contain biases or inaccuracies, leading to biased or misleading responses if not carefully mitigated. 53 | * **Security concerns:** Accessing external sources introduces potential data security risks, requiring robust security measures to protect sensitive information. 54 | 55 | ### Choosing the right approach 56 | 57 | While this tutorial focuses specifically on interactive RAG, the optimal approach depends on your application's specific needs and constraints. Consider: 58 | 59 | * **Data size and update frequency:** Static models are suitable for static or infrequently changing data, while interactive RAG is necessary for frequently changing data. 60 | * **Real-time requirements:** Choose static RAG for applications requiring fast response times. For less critical applications, interactive RAG may be preferred. 61 | * **Computational resources:** Evaluate your available resources when choosing between static and interactive approaches. 62 | * **Data privacy and security:** Ensure your chosen approach adheres to all relevant data privacy and security regulations. 63 | 64 | 65 | ## Chunking: a hidden hero in the rise of GenAI 66 | 67 | Now, let's put our detective hat back on. If you have a mountain of evidence available for a particular case, you wouldn't try to analyze every piece of evidence at once, right? You'd break it down into smaller, more manageable pieces — documents, witness statements, physical objects — and examine each one carefully. In the world of large language models, this process of breaking down information is called _chunking_, and it plays a crucial role in unlocking the full potential of retrieval-augmented generation. 68 | 69 | Just like a detective, an LLM can't process a mountain of information all at once. Chunking helps it break down text into smaller, more digestible pieces called _chunks_. Think of these chunks as bite-sized pieces of knowledge that the LLM can easily analyze and understand. This allows the LLM to focus on specific sections of the text, extract relevant information, and generate more accurate and insightful responses. 70 | 71 | However, the size of each chunk isn't just about convenience for the LLM; it also significantly impacts the _retrieval vector relevance score_, a key metric in evaluating the effectiveness of chunking strategies. The process involves converting text to vectors, measuring the distance between them, utilizing ANN/KNN algorithms, and calculating a score for the generated vectors. 72 | 73 | Here is an example: Imagine asking "What is a mango?" and the LLM dives into its knowledge base, encountering these chunks: 74 | 75 | **High scores:** 76 | 77 | * **Chunk:** "Mango is a tropical stone fruit with a sweet, juicy flesh and a single pit." (Score: 0.98) 78 | * **Chunk:** "In India, mangoes are revered as the 'King of Fruits' and hold cultural significance." (Score: 0.92) 79 | * **Chunk:** "The mango season brings joy and delicious treats like mango lassi and mango ice cream." (Score: 0.85) 80 | 81 | These chunks directly address the question, providing relevant information about the fruit's characteristics, cultural importance, and culinary uses. High scores reflect their direct contribution to answering your query. 82 | 83 | **Low scores:** 84 | 85 | * **Chunk:** "Volcanoes spew molten lava and ash, causing destruction and reshaping landscapes." (Score: 0.21) 86 | * **Chunk:** "The stock market fluctuates wildly, driven by economic factors and investor sentiment." (Score: 0.42) 87 | * **Chunk:** "Mitochondria, the 'powerhouses of the cell,' generate energy for cellular processes." (Score: 0.55) 88 | 89 | These chunks, despite containing interesting information, are completely unrelated to mangoes. They address entirely different topics, earning low scores due to their lack of relevance to the query. 90 | 91 | Check out [ChunkViz v0.1](https://www.chunkviz.com/) to get a feel for how chunk size (character length) breaks down text. 92 | 93 | ![Chunk Visualization](./images/chunkviz-1.png) 94 | 95 | **Balancing detail and context:** 96 | 97 | The size of each chunk influences the retrieval vector relevance score in distinct ways: 98 | 99 | **Smaller chunk size:** 100 | 101 | * **Pros:** 102 | * Precise focus on specific details and nuances 103 | * Potentially higher relevance scores due to accurate information extraction 104 | * Increased sensitivity to subtle changes in meaning 105 | * **Cons:** 106 | * May sacrifice broader context and understanding of the overall message 107 | * Requires more computational resources to process numerous chunks 108 | * Increased risk of missing relevant information due to limited context 109 | 110 | **Larger chunk size:** 111 | 112 | * **Pros:** 113 | * Provides a richer context for comprehending the overall message 114 | * More efficient processing with fewer chunks to handle 115 | * Potentially higher relevance scores for related chunks due to broader context 116 | * **Cons:** 117 | * May overlook specific details and subtle shifts in meaning 118 | * Increased risk of including irrelevant information within a chunk, potentially lowering the relevance score 119 | 120 | **Examples in action:** 121 | 122 | **Smaller chunk size:** 123 | 124 | * **Example:** Analyzing specific clauses in a legal document to identify potential inconsistencies 125 | * **Benefit:** Increased precision in detecting subtle shifts in meaning and ensuring accurate retrieval of relevant information 126 | 127 | **Larger chunk size:** 128 | 129 | * **Example:** Summarizing a long document by extracting key ideas and information across various sections 130 | * **Benefit:** Improved context for comprehending the overall message and the relationships between different parts of the text 131 | 132 | **Considerations for optimal chunking:** 133 | 134 | Finding the ideal chunk size is a delicate balance between focusing on specific details and capturing the broader context. Several factors influence this: 135 | 136 | * **Task at hand:** For tasks like question-answering, smaller chunks might be preferred for pinpoint accuracy. In contrast, summarization tasks benefit from larger chunks for better context. 137 | * **Data type:** Different types of data might require different chunking approaches. For example, code might be chunked differently than a news article. 138 | * **Desired accuracy:** Smaller chunks can lead to higher precision, while larger chunks might offer better overall understanding. 139 | 140 | **Unlocking the future:** 141 | 142 | Effective chunking maximizes the retrieval vector relevance score, enabling LLMs to generate the most accurate and insightful responses possible. By understanding the impact of chunk size and other relevant factors, we can unleash the full potential of LLMs and unlock exciting opportunities for the future. In this tutorial, the chunk size we will be controlling interactively is the retrieval chunk. 143 | 144 | ## Interactive retrieval-augmented generation 145 | 146 | ## ![RAG Agent Architecture for this Tutorial](./images/rag-agent.png) 147 | 148 | In this tutorial, we will showcase an interactive RAG agent. An agent is a computer program or system designed to perceive its environment, make decisions, and achieve specific goals. The interactive RAG agent we will showcase supports the following actions: 149 | - answering questions 150 | - searching the web 151 | - reading web content (URLs) 152 | - listing all sources 153 | - removing sources 154 | - resetting messages 155 | - modifying rag strategy (num_sources, chunk_size, etc.) 156 | 157 | ## Taking control with interactive RAG 158 | 159 | While an optimized chunk size is crucial, interactive RAG goes a step further. It empowers users to dynamically adjust their RAG strategy in real-time, using the function calling API of large language models. This unlocks a new era of personalized information access and knowledge management. 160 | 161 | This interactive RAG tutorial leverages: 162 | 163 | * **Dynamic strategy adjustment:** Unlike traditional RAG approaches, users can fine-tune chunk size, the number of sources, and other parameters on the fly, tailoring the LLM's response to their specific needs. 164 | * **Function calling API integration:** Function calling API seamlessly integrates external tools and services with LLMs. This allows users to seamlessly incorporate their data sources and tools into their RAG workflow. 165 | 166 | **Benefits:** 167 | 168 | * Enhanced information retrieval and knowledge management 169 | * Improved accuracy and relevance of LLM responses 170 | * Flexible and versatile framework for building AI applications 171 | 172 | 173 | ## Ingesting content into your vector database 174 | 175 | ### Streamlining content ingestion with function calling 176 | 177 | While vector databases offer significant advantages for GenAI applications, the process of ingesting content can feel cumbersome. Fortunately, we can harness the power of function calling API to seamlessly add new content to the database, simplifying the workflow and ensuring continuous updates. 178 | 179 | ### Choosing the right home for your embeddings 180 | 181 | While various databases can store vector embeddings, each with unique strengths, [MongoDB Atlas](https://cloud.mongodb.com) stands out for GenAI applications. Imagine MongoDB as a delicious cake you can both bake and eat. Not only does it offer the familiar features of MongoDB, but it also lets you store and perform mathematical operations on your vector embeddings directly within the platform. This eliminates the need for separate tools and streamlines the entire process. 182 | 183 | By leveraging the combined power of function calling API and MongoDB Atlas, you can streamline your content ingestion process and unlock the full potential of vector embeddings for your GenAI applications. 184 | 185 | ![RAG architecture diagram with MongoDB Atlas](./images/mdb_diagram.png) 186 | 187 | ### Detailed breakdown 188 | 189 | 1. **Vector embeddings**: MongoDB Atlas provides the functionality to store vector embeddings at the core of your document. These embeddings are generated by converting text, video, or audio into vectors utilizing models such as [GPT4All](https://gpt4all.io/index.html), [OpenAI](https://openai.com/) or [Hugging Face](https://huggingface.co/). 190 | 191 | ```python 192 | # Chunk Ingest Strategy 193 | self.text_splitter = RecursiveCharacterTextSplitter( 194 | # Set a really small chunk size, just to show. 195 | chunk_size=4000, # THIS CHUNK SIZE IS FIXED - INGEST CHUNK SIZE DOES NOT CHANGE 196 | chunk_overlap=200, # CHUNK OVERLAP IS FIXED 197 | length_function=len, 198 | add_start_index=True, 199 | ) 200 | # load data from webpages using Playwright. One document will be created for each webpage 201 | # split the documents using a text splitter to create "chunks" 202 | loader = PlaywrightURLLoader(urls=urls, remove_selectors=["header", "footer"]) 203 | documents = loader.load_and_split(self.text_splitter) 204 | self.index.add_documents( 205 | documents 206 | ) 207 | ``` 208 | 209 | 2. **Vector index**: When employing vector search, it's necessary to [create a search index](https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/). This process entails setting up the vector path, aligning the dimensions with your chosen model, and selecting a vector function for searching the top K-nearest neighbors. 210 | ```python 211 | { 212 | "name": "", 213 | "type": "vectorSearch", 214 | "fields":[ 215 | { 216 | "type": "vector", 217 | "path": , 218 | "numDimensions": , 219 | "similarity": "euclidean | cosine | dotProduct" 220 | }, 221 | ... 222 | ] 223 | } 224 | ``` 225 | 3. **Chunk retrieval**: Once the vector embeddings are indexed, an aggregation pipeline can be created on your embedded vector data to execute queries and retrieve results. This is accomplished using the [$vectorSearch](https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage) operator, a new aggregation stage in Atlas. 226 | 227 | ```python 228 | def recall(self, text, n_docs=2, min_rel_score=0.25, chunk_max_length=800,unique=True): 229 | #$vectorSearch 230 | print("recall=>"+str(text)) 231 | response = self.collection.aggregate([ 232 | { 233 | "$vectorSearch": { 234 | "index": "default", 235 | "queryVector": self.gpt4all_embd.embed_query(text), #GPT4AllEmbeddings() 236 | "path": "embedding", 237 | #"filter": {}, 238 | "limit": 15, #Number (of type int only) of documents to return in the results. Value can't exceed the value of numCandidates. 239 | "numCandidates": 50 #Number of nearest neighbors to use during the search. You can't specify a number less than the number of documents to return (limit). 240 | } 241 | }, 242 | { 243 | "$addFields": 244 | { 245 | "score": { 246 | "$meta": "vectorSearchScore" 247 | } 248 | } 249 | }, 250 | { 251 | "$match": { 252 | "score": { 253 | "$gte": min_rel_score 254 | } 255 | } 256 | },{"$project":{"score":1,"_id":0, "source":1, "text":1}}]) 257 | tmp_docs = [] 258 | str_response = [] 259 | for d in response: 260 | if len(tmp_docs) == n_docs: 261 | break 262 | if unique and d["source"] in tmp_docs: 263 | continue 264 | tmp_docs.append(d["source"]) 265 | str_response.append({"URL":d["source"],"content":d["text"][:chunk_max_length],"score":d["score"]}) 266 | kb_output = f"Knowledgebase Results[{len(tmp_docs)}]:\n```{str(str_response)}```\n## \n```SOURCES: "+str(tmp_docs)+"```\n\n" 267 | self.st.write(kb_output) 268 | return str(kb_output) 269 | ``` 270 | 271 | In this tutorial, we will mainly be focusing on the **CHUNK RETRIEVAL** strategy using the function calling API of LLMs and MongoDB Atlas as our **[data platform](https://www.mongodb.com/atlas)**. 272 | 273 | ## Key features of MongoDB Atlas 274 | MongoDB Atlas offers a robust vector search platform with several key features, including: 275 | 276 | 1. **$vectorSearch operator:** 277 | This powerful aggregation pipeline operator allows you to search for documents based on their vector embeddings. You can specify the index to search, the query vector, and the similarity metric to use. [$vectorSearch](https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage) provides efficient and scalable search capabilities for vector data. 278 | 279 | 2. **Flexible filtering:** 280 | You can combine $vectorSearch with other aggregation pipeline operators like [$match](https://www.mongodb.com/docs/v7.0/reference/operator/aggregation/match/), [$sort](https://www.mongodb.com/docs/v7.0/reference/operator/aggregation/sort/), and [$limit](https://www.mongodb.com/docs/v7.0/reference/operator/aggregation/limit/) to filter and refine your search results. This allows you to find the most relevant documents based on both their vector embeddings and other criteria. 281 | 282 | 3. **Support for various similarity metrics:** 283 | MongoDB Atlas supports different similarity metrics like [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) and [euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance), allowing you to choose the best measure for your specific data and task. 284 | 285 | 4. **High performance:** 286 | The vector search engine in MongoDB Atlas is optimized for large datasets and high query volumes, ensuring efficient and responsive search experiences. 287 | 288 | 5. **Scalability:** 289 | MongoDB Atlas scales seamlessly to meet your growing needs, allowing you to handle increasing data volumes and query workloads effectively. 290 | 291 | **Additionally, MongoDB Atlas offers several features relevant to its platform capabilities:** 292 | 293 | * **Global availability:** 294 | Your data is stored in multiple data centers around the world, ensuring high availability and disaster recovery. 295 | * **Security:** 296 | MongoDB Atlas provides robust security features, including encryption at rest and in transit, access control, and data audit logging. 297 | * **Monitoring and alerting:** 298 | MongoDB Atlas provides comprehensive monitoring and alerting features to help you track your cluster's performance and identify potential issues. 299 | * **Developer tools:** 300 | MongoDB Atlas offers various developer tools and APIs to simplify development and integration with your applications. 301 | 302 | ## OpenAI function calling: 303 | OpenAI's function calling is a powerful capability that enables users to seamlessly interact with OpenAI models, such as GPT-3.5, through programmable commands. This functionality allows developers and enthusiasts to harness the language model's vast knowledge and natural language understanding by incorporating it directly into their applications or scripts. Through function calling, users can make specific requests to the model, providing input parameters and receiving tailored responses. This not only facilitates more precise and targeted interactions but also opens up a world of possibilities for creating dynamic, context-aware applications that leverage the extensive linguistic capabilities of OpenAI's models. Whether for content generation, language translation, or problem-solving, OpenAI function calling offers a flexible and efficient way to integrate cutting-edge language processing into various domains. 304 | 305 | ## Key features of OpenAI function calling: 306 | - Function calling allows you to connect large language models to external tools. 307 | - The [Chat Completions API](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) generates JSON that can be used to call functions in your code. 308 | - The latest models have been trained to detect when a function should be called and respond with JSON that adheres to the function signature. 309 | - Building user confirmation flows is recommended before taking actions that impact the world on behalf of users. 310 | - Function calling can be used to create assistants that answer questions by calling external APIs, convert natural language into API calls, and extract structured data from text. 311 | - The basic sequence of steps for function calling involves calling the model, parsing the JSON response, calling the function with the provided arguments, and summarizing the results back to the user. 312 | - Function calling is supported by specific model versions, including [GPT-4](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and [GPT-3.5-turbo](https://platform.openai.com/docs/models/gpt-3-5). 313 | - Parallel function calling allows multiple function calls to be performed together, reducing round-trips with the API. 314 | - Tokens are used to inject functions into the system message and count against the model's context limit and billing. 315 | 316 | ![Function Calling Diagram - Simple](./images/function_calling.png) 317 | 318 | Read more at ThinhDA. 319 | 320 | ## Function calling API basics: actions 321 | 322 | Actions are functions that an agent can invoke. There are two important design considerations around actions: 323 | 324 | * Giving the agent access to the right actions 325 | * Describing the actions in a way that is most helpful to the agent 326 | 327 | ## Crafting actions for effective agents 328 | 329 | **Actions are the lifeblood of an agent's decision-making.** They define the options available to the agent and shape its interactions with the environment. Consequently, designing effective actions is crucial for building successful agents. 330 | 331 | Two key considerations guide this design process: 332 | 333 | 1. **Access to relevant actions:** Ensure the agent has access to actions necessary to achieve its objectives. Omitting critical actions limits the agent's capabilities and hinders its performance. 334 | 2. **Action description clarity:** Describe actions in a way that is informative and unambiguous for the agent. Vague or incomplete descriptions can lead to misinterpretations and suboptimal decisions. 335 | 336 | By carefully designing actions that are both accessible and well-defined, you equip your agent with the tools and knowledge necessary to navigate its environment and achieve its objectives. 337 | 338 | Further considerations: 339 | 340 | * **Granularity of actions:** Should actions be high-level or low-level? High-level actions offer greater flexibility but require more decision-making, while low-level actions offer more control but limit adaptability. 341 | * **Action preconditions and effects:** Clearly define the conditions under which an action can be taken and its potential consequences. This helps the agent understand the implications of its choices. 342 | 343 | 344 | If you don't give the agent the right actions and describe them in an effective way, you won’t be able to build a working agent. 345 | 346 | ![LangChain Tools Diagram](./images/llm_agent.png) 347 | (_Credit to blog post: [Make Langchain Agent Actually Work With Local LLMs (Vicuna, WizardLM)](https://betterprogramming.pub/make-langchain-agent-actually-works-with-local-llms-vicuna-wizardlm-etc-da42b6b1a97)_) 348 | 349 | An LLM is then called, resulting in either a response to the user or action(s) to be taken. If it is determined that a response is required, then that is passed to the user, and that cycle is finished. If it is determined that an action is required, that action is then taken, and an observation (action result) is made. That action and corresponding observation are added back to the prompt (we call this an “agent scratchpad”), and the loop resets — i.e., the LLM is called again (with the updated agent scratchpad). 350 | 351 | ## Getting started 352 | 353 | Clone the demo Github repository. 354 | ```bash 355 | git clone git@github.com:ranfysvalle02/Interactive-RAG.git 356 | ``` 357 | 358 | Create a new Python environment. 359 | ```bash 360 | python3 -m venv env 361 | ``` 362 | 363 | Activate the new Python environment. 364 | ```bash 365 | source env/bin/activate 366 | ``` 367 | 368 | Install the requirements. 369 | ```bash 370 | pip3 install -r requirements.txt 371 | ``` 372 | Set the parameters in [params.py](rag/params.py): 373 | ```bash 374 | # MongoDB 375 | MONGODB_URI = "" 376 | DATABASE_NAME = "genai" 377 | COLLECTION_NAME = "rag" 378 | 379 | # If using OpenAI 380 | OPENAI_API_KEY = "" 381 | 382 | # If using Azure OpenAI 383 | #OPENAI_TYPE = "azure" 384 | #OPENAI_API_VERSION = "2023-10-01-preview" 385 | #OPENAI_AZURE_ENDPOINT = "https://.openai.azure.com/" 386 | #OPENAI_AZURE_DEPLOYMENT = "" 387 | 388 | ``` 389 | Create a Search index with the following definition: 390 | ```JSON 391 | { 392 | "type": "vectorSearch", 393 | "fields": [ 394 | { 395 | "numDimensions": 384, 396 | "path": "embedding", 397 | "similarity": "cosine", 398 | "type": "vector" 399 | } 400 | ] 401 | } 402 | ``` 403 | 404 | Set the environment. 405 | ```bash 406 | export OPENAI_API_KEY= 407 | ``` 408 | 409 | To run the RAG application: 410 | 411 | ```bash 412 | env/bin/streamlit run rag/app.py 413 | ``` 414 | Log information generated by the application will be appended to app.log. 415 | 416 | ## Usage 417 | This bot supports the following actions: answering questions, searching the web, reading URLs, removing sources, listing all sources, viewing messages, and resetting messages. 418 | 419 | It also supports an action called iRAG that lets you dynamically control your agent's RAG strategy. 420 | 421 | Ex: "set RAG config to 3 sources and chunk size 1250" => New RAG config:{'num_sources': 3, 'source_chunk_size': 1250, 'min_rel_score': 0, 'unique': True}. 422 | 423 | If the bot is unable to provide an answer to the question from data stored in the Atlas Vector store and your RAG strategy (number of sources, chunk size, min_rel_score, etc), it will initiate a web search to find relevant information. You can then instruct the bot to read and learn from those results. 424 | 425 | 426 | ## Demo 427 | 428 | Let's start by asking our agent a question — in this case, "What is a mango?" The first thing that will happen is it will try to "recall" any relevant information using vector embedding similarity. It will then formulate a response with the content it "recalled" or will perform a web search. Since our knowledge base is currently empty, we need to add some sources before it can formulate a response. 429 | 430 | ![DEMO - Ask a Question](./images/ask_question.png) 431 | 432 | Since the bot is unable to provide an answer using the content in the vector database, it initiated a Google search to find relevant information. We can now tell it which sources it should "learn." In this case, we'll tell it to learn the first two sources from the search results. 433 | 434 | 435 | 436 | ![DEMO - Add a source](./images/add_sources.png) 437 | 438 | ## Change RAG strategy 439 | 440 | Next, let's modify the RAG strategy! Let's make it only use one source and have it use a small chunk size of 500 characters. 441 | 442 | ![DEMO - Change RAG strategy part 1](./images/mod_rag.png) 443 | 444 | Notice that though it was able to retrieve a chunk with a fairly high relevance score, it was not able to generate a response because the chunk size was too small and the chunk content was not relevant enough to formulate a response. Since it could not generate a response with the small chunk, it performed a web search on the user's behalf. 445 | 446 | Let's see what happens if we increase the chunk size to 3,000 characters instead of 500. 447 | 448 | ![DEMO - Change RAG strategy part 2](./images/mod_rag-2.png) 449 | 450 | Now, with a larger chunk size, it was able to accurately formulate the response using the knowledge from the vector database! 451 | 452 | ## List all sources 453 | 454 | Let's see what's available in the knowledge base of the agent by asking it, “What sources do you have in your knowledge base?” 455 | 456 | ![DEMO - List all sources](./images/list_sources.png) 457 | 458 | ## Remove a source of information 459 | 460 | If you want to remove a specific resource, you could do something like: 461 | ``` 462 | USER: remove source 'https://www.oracle.com' from the knowledge base 463 | ``` 464 | 465 | To remove all the sources in the collection, we could do something like: 466 | 467 | ![DEMO - Remove ALL sources](./images/forget.png) 468 | 469 | This demo has provided a glimpse into the inner workings of our AI agent, showcasing its ability to learn and respond to user queries in an interactive manner. We've witnessed how it seamlessly combines its internal knowledge base with real-time web search to deliver comprehensive and accurate information. The potential of this technology is vast, extending far beyond simple question-answering. None of this would be possible without the magic of the function calling API. 470 | 471 | ## Embracing the future of information access with interactive RAG 472 | 473 | This post has explored the exciting potential of interactive retrievalaugmented generation (RAG) with the powerful combination of MongoDB Atlas and function calling API. We've delved into the crucial role of chunking, embedding, and retrieval vector relevance score in optimizing RAG performance, unlocking its true potential for information retrieval and knowledge management. 474 | 475 | Interactive RAG, powered by the combined forces of MongoDB Atlas and function calling API, represents a significant leap forward in the realm of information retrieval and knowledge management. By enabling dynamic adjustment of the RAG strategy and seamless integration with external tools, it empowers users to harness the full potential of LLMs for a truly interactive and personalized experience. 476 | 477 | Intrigued by the possibilities? Explore the full source code for the interactive RAG application and unleash the power of RAG with MongoDB Atlas and function calling API in your own projects! 478 | 479 | Together, let's unlock the transformative potential of this potent combination and forge a future where information is effortlessly accessible and knowledge is readily available to all. 480 | 481 | View is the [full source code](https://github.com/ranfysvalle02/Interactive-RAG/) for the interactive RAG application using MongoDB Atlas and function calling API. 482 | 483 | ### Additional MongoDB Resources 484 | 485 | - [RAG with Atlas Vector Search, LangChain, and OpenAI](https://www.mongodb.com/developer/products/atlas/rag-atlas-vector-search-langchain-openai/) 486 | - [Taking RAG to Production with the MongoDB Documentation AI Chatbot](https://www.mongodb.com/developer/products/atlas/taking-rag-to-production-documentation-ai-chatbot/) 487 | - [What is Artificial Intelligence (AI)?](https://www.mongodb.com/basics/what-is-artificial-intelligence) 488 | - [Unlock the Power of Semantic Search with MongoDB Atlas Vector Search](https://www.mongodb.com/basics/semantic-search) 489 | - [Machine Learning in Healthcare: 490 | Real-World Use Cases and What You Need to Get Started](https://www.mongodb.com/basics/machine-learning-healthcare) 491 | - [What is Generative AI? 492 | ](https://www.mongodb.com/basics/generative-ai) 493 | 494 | 495 | 496 | 497 | -------------------------------------------------------------------------------- /rag/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import urllib.parse 4 | import re 5 | import json 6 | import inspect 7 | import time 8 | import uuid 9 | from typing import List, Dict, Any, Optional 10 | import traceback 11 | import tempfile 12 | import io 13 | import concurrent.futures 14 | 15 | # --- 0. DEPENDENCIES --- 16 | # pip install pymongo flask openai python-dotenv flask-cors requests langchain 17 | # pip install langchain-openai langchain-mongodb ddgs docling langchain-voyageai voyageai 18 | # REMEMBER: if previously installed "duckduckgo-search", remove it: 19 | # pip uninstall duckduckgo-search -y 20 | # then "pip install ddgs". 21 | 22 | # --- Core Libraries --- 23 | import pymongo 24 | from flask import Flask, request, jsonify, render_template, render_template_string 25 | from flask_cors import CORS 26 | from pymongo.operations import SearchIndexModel 27 | from pymongo.errors import OperationFailure, ConnectionFailure 28 | from dotenv import load_dotenv 29 | import requests 30 | from bson.objectid import ObjectId 31 | from ddgs import DDGS 32 | from docling.document_converter import DocumentConverter 33 | 34 | # --- LangChain Imports --- 35 | from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings 36 | from langchain_voyageai import VoyageAIEmbeddings 37 | from langchain_mongodb import MongoDBAtlasVectorSearch 38 | from langchain.agents import AgentExecutor, create_openai_tools_agent 39 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 40 | from langchain_core.messages import HumanMessage, AIMessage 41 | from langchain_core.tools import tool 42 | from langchain.text_splitter import RecursiveCharacterTextSplitter 43 | 44 | # --- 1. CONFIG / ENV SETUP --- 45 | load_dotenv() 46 | 47 | DATABASE_NAME = "interactive_rag_db" 48 | COLLECTION_NAME = "knowledge_base_sessions" 49 | SESSION_FIELD = "session_id" 50 | 51 | # Embedding configuration for multiple models 52 | EMBEDDING_CONFIG = { 53 | "openai": { 54 | "vector_field": "embedding_openai", 55 | "index_name": "openai_vector_index", 56 | "dimensions": 1536 57 | }, 58 | "voyageai": { 59 | "vector_field": "embedding_voyageai", 60 | "index_name": "voyageai_vector_index", 61 | "dimensions": 1024 62 | } 63 | } 64 | 65 | # Logging setup 66 | logging.basicConfig( 67 | filename="rag_agent.log", 68 | filemode="a", 69 | format="%(asctime)s - %(levelname)s - %(message)s", 70 | level=logging.INFO 71 | ) 72 | logger = logging.getLogger(__name__) 73 | 74 | def print_log(message: str): 75 | print(message) 76 | logger.info(message) 77 | 78 | 79 | # --- 2. GLOBAL AGENT STATE --- 80 | class AgentConfig: 81 | def __init__(self): 82 | self.rag_config = { 83 | "num_sources": 3, 84 | "min_rel_score": 0.0, 85 | "max_chunk_length": 2000 86 | } 87 | self.embedding_clients = {} 88 | 89 | # Connect to MongoDB 90 | try: 91 | self.db_client = pymongo.MongoClient( 92 | os.getenv("MDB_URI"), 93 | serverSelectionTimeoutMS=10000 94 | ) 95 | self.db_client.admin.command('ping') 96 | print_log("[INFO] MongoDB connection successful.") 97 | except (ConnectionFailure, OperationFailure) as e: 98 | print_log(f"[FATAL] 🚨 MongoDB connection failed. Error: {e}") 99 | raise 100 | 101 | self.db = self.db_client[DATABASE_NAME] 102 | self.collection = self.db[COLLECTION_NAME] 103 | 104 | # In-memory chat logs 105 | self.chat_history = {} 106 | self.current_session = "default" 107 | 108 | # For storing last search result sources (optional) 109 | self.last_retrieved_sources = [] 110 | 111 | # Initialize embeddings 112 | print_log("--- 🧠 Initializing Embedding Clients ---") 113 | # 1) OpenAI 114 | self.embedding_clients["openai"] = AzureOpenAIEmbeddings( 115 | azure_deployment=os.getenv("EMBEDDING_DEPLOYMENT_NAME", "text-embedding-ada-002"), 116 | azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), 117 | api_key=os.getenv("AZURE_OPENAI_API_KEY") 118 | ) 119 | print_log("[INFO] OpenAI embedding client initialized.") 120 | 121 | # 2) VoyageAI (if VOYAGE_API_KEY is set) 122 | if os.getenv("VOYAGE_API_KEY"): 123 | try: 124 | self.embedding_clients["voyageai"] = VoyageAIEmbeddings( 125 | model="voyage-2", 126 | voyage_api_key=os.getenv("VOYAGE_API_KEY") 127 | ) 128 | print_log("[INFO] VoyageAI embedding client initialized.") 129 | except Exception as e: 130 | print_log(f"[WARN] ⚠️ VoyageAI initialization failed: {e}. Skipping.") 131 | else: 132 | print_log("[INFO] VOYAGE_API_KEY not found. VoyageAI embeddings not available.") 133 | 134 | print_log("------------------------------------") 135 | 136 | config = AgentConfig() 137 | 138 | 139 | # --- 3. BACKGROUND TASK SETUP --- 140 | executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) 141 | tasks = {} 142 | 143 | def run_ingestion_task( 144 | task_id: str, 145 | content: str, 146 | source: str, 147 | source_type: str, 148 | session_id: str, 149 | chunk_size: int, 150 | chunk_overlap: int 151 | ): 152 | """Handles chunking & embedding in a background thread.""" 153 | try: 154 | tasks[task_id] = {"status": "processing", "step": "Chunking content..."} 155 | print_log(f"[Task {task_id}] Chunking '{source}' with size {chunk_size} and overlap {chunk_overlap}...") 156 | 157 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 158 | chunks = text_splitter.split_text(content) 159 | if not chunks: 160 | raise ValueError("Could not split content into any chunks.") 161 | 162 | tasks[task_id] = {"status": "processing", "step": "Generating embeddings..."} 163 | print_log(f"[Task {task_id}] Generating embeddings for {len(chunks)} chunks...") 164 | all_embeddings = _embed_chunks_parallel(chunks) 165 | 166 | tasks[task_id] = {"status": "processing", "step": "Saving to knowledge base..."} 167 | print_log(f"[Task {task_id}] Saving {len(chunks)} chunks to the database...") 168 | 169 | to_insert = [] 170 | for i, chunk_text in enumerate(chunks): 171 | doc = { 172 | "text": chunk_text, 173 | "metadata": { 174 | "source": source, 175 | "source_type": source_type, 176 | SESSION_FIELD: session_id, 177 | "chunk_index": i 178 | } 179 | } 180 | for model_name, emb_list in all_embeddings.items(): 181 | if emb_list: 182 | vec_field = EMBEDDING_CONFIG[model_name]["vector_field"] 183 | doc[vec_field] = emb_list[i] 184 | to_insert.append(doc) 185 | 186 | config.collection.insert_many(to_insert) 187 | final_message = f"Successfully ingested {len(chunks)} chunks from source '{source}'." 188 | tasks[task_id] = {"status": "complete", "message": final_message} 189 | print_log(f"[Task {task_id}] {final_message}") 190 | 191 | except Exception as e: 192 | error_message = f"Ingestion failed: {str(e)}" 193 | print_log(f"[Task {task_id}] [ERROR] {error_message}\n{traceback.format_exc()}") 194 | tasks[task_id] = {"status": "failed", "message": error_message} 195 | 196 | 197 | # --- 4. LANGCHAIN SETUP --- 198 | CHAT_DEPLOYMENT_NAME = os.getenv("CHAT_DEPLOYMENT_NAME", "gpt-4o") 199 | print_log(f"--- 🧠 Initializing LLM ---\nChat Deployment: '{CHAT_DEPLOYMENT_NAME}'\n------------------------------------") 200 | 201 | llm = AzureChatOpenAI( 202 | azure_deployment=CHAT_DEPLOYMENT_NAME, 203 | api_version="2024-02-01", 204 | azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), 205 | api_key=os.getenv("AZURE_OPENAI_API_KEY"), 206 | temperature=0 207 | ) 208 | 209 | 210 | # --- 5. CORE FUNCTIONS --- 211 | def _embed_chunks_parallel(chunks: List[str]) -> Dict[str, List[List[float]]]: 212 | """Embed the text chunks in parallel for each available embedding model.""" 213 | embeddings = {} 214 | with concurrent.futures.ThreadPoolExecutor() as inner_executor: 215 | future_to_model = { 216 | inner_executor.submit(client.embed_documents, chunks): model_name 217 | for model_name, client in config.embedding_clients.items() 218 | } 219 | for future in concurrent.futures.as_completed(future_to_model): 220 | model_name = future_to_model[future] 221 | try: 222 | embeddings[model_name] = future.result() 223 | print_log(f"[INFO] Generated {len(chunks)} embeddings with {model_name}.") 224 | except Exception as e: 225 | print_log(f"[ERROR] Embedding with {model_name} failed: {e}") 226 | embeddings[model_name] = None 227 | return embeddings 228 | 229 | def _update_chunk_in_db(chunk_id: str, new_content: str) -> Dict[str, Any]: 230 | """Update chunk text and re-embed with all available models.""" 231 | oid = ObjectId(chunk_id) 232 | update_payload = {"$set": {"text": new_content}} 233 | 234 | print_log(f"[INFO] Re-embedding chunk {chunk_id} with all available models...") 235 | all_embeddings = _embed_chunks_parallel([new_content]) 236 | for model_name, embeddings_list in all_embeddings.items(): 237 | if embeddings_list: 238 | vector_field = EMBEDDING_CONFIG[model_name]["vector_field"] 239 | update_payload["$set"][vector_field] = embeddings_list[0] 240 | 241 | result = config.collection.update_one({"_id": oid}, update_payload) 242 | if result.matched_count == 0: 243 | raise ValueError(f"Could not find chunk with ID '{chunk_id}'.") 244 | 245 | return {"status": "success", "message": f"Chunk '{chunk_id}' updated (re-embedded)."} 246 | 247 | def _delete_chunk_from_db(chunk_id: str) -> Dict[str, Any]: 248 | """Delete a single chunk by ID.""" 249 | result = config.collection.delete_one({"_id": ObjectId(chunk_id)}) 250 | if result.deleted_count == 0: 251 | raise ValueError(f"Could not find chunk '{chunk_id}' to delete.") 252 | return {"status": "success", "message": f"Chunk '{chunk_id}' deleted."} 253 | 254 | def _perform_vector_search( 255 | query: str, 256 | session_id: str, 257 | embedding_model: str, 258 | num_sources: int 259 | ) -> List[Dict]: 260 | """Perform a vector-based search in MongoDB for top `num_sources` results.""" 261 | if embedding_model not in config.embedding_clients: 262 | raise ValueError(f"Embedding model '{embedding_model}' is not available.") 263 | 264 | model_config = EMBEDDING_CONFIG[embedding_model] 265 | embedding_client = config.embedding_clients[embedding_model] 266 | query_vector = embedding_client.embed_query(query) 267 | 268 | pipeline = [ 269 | { 270 | "$vectorSearch": { 271 | "index": model_config['index_name'], 272 | "path": model_config['vector_field'], 273 | "queryVector": query_vector, 274 | "numCandidates": num_sources * 10, 275 | "limit": num_sources, 276 | "filter": { 277 | f"metadata.{SESSION_FIELD}": {"$eq": session_id} 278 | } 279 | } 280 | }, 281 | { 282 | "$project": { 283 | "_id": 0, 284 | "content": "$text", 285 | "source": "$metadata.source", 286 | "score": {"$meta": "vectorSearchScore"} 287 | } 288 | } 289 | ] 290 | 291 | return list(config.collection.aggregate(pipeline)) 292 | 293 | 294 | # --- 6. AGENT TOOLS --- 295 | @tool 296 | def search_knowledge_base(query: str, embedding_model: str, num_sources: int = 3, max_chunk_length: int = 2000) -> str: 297 | """Query the knowledge base to find relevant chunks for `query`.""" 298 | try: 299 | print_log(f"[INFO] Searching with '{embedding_model}' → top {num_sources}") 300 | results_with_scores = _perform_vector_search(query, config.current_session, embedding_model, num_sources) 301 | 302 | if not results_with_scores: 303 | config.last_retrieved_sources = [] 304 | return f"No relevant info found in session '{config.current_session}'." 305 | 306 | # Remember sources 307 | found_sources = [r.get("source", "N/A") for r in results_with_scores] 308 | config.last_retrieved_sources = list(set(found_sources)) 309 | 310 | # Build a context string 311 | context_parts = [] 312 | for r in results_with_scores: 313 | text = r.get("content", "") 314 | src = r.get("source", "N/A") 315 | score = r.get("score", 0.0) 316 | if max_chunk_length and len(text) > max_chunk_length: 317 | text = text[:max_chunk_length] + "... [truncated]" 318 | context_parts.append(f"Source: {src} (Score: {score:.4f})\nContent: {text}") 319 | 320 | context = "\n---\n".join(context_parts) 321 | return f"Retrieved from '{embedding_model}':\n{context}" 322 | 323 | except Exception as e: 324 | config.last_retrieved_sources = [] 325 | print_log(f"[ERROR] search_knowledge_base: {e}") 326 | return f"❌ Search error: {e}" 327 | 328 | @tool 329 | def read_url(url: str, chunk_size: int=1000, chunk_overlap: int=150) -> str: 330 | """Adds a URL's content (via r.jina.ai) into the knowledge base.""" 331 | try: 332 | if config.collection.find_one({"metadata.source": url, f"metadata.{SESSION_FIELD}": config.current_session}): 333 | return f"❌ Source '{url}' already exists in session '{config.current_session}'." 334 | 335 | jina_key = os.getenv("JINA_API_KEY") 336 | if not jina_key: 337 | return "❌ JINA_API_KEY not set." 338 | 339 | headers = {"Authorization": f"Bearer {jina_key}", "Accept": "application/json"} 340 | print_log(f"[INFO] Reading & ingesting URL: {url}") 341 | 342 | resp = requests.get(f"https://r.jina.ai/{url}", headers=headers, timeout=30) 343 | resp.raise_for_status() 344 | page_content = resp.json().get("data", {}).get("content", "") 345 | if not page_content: 346 | return f"❌ No meaningful content from {url}." 347 | 348 | splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 349 | chunks = splitter.split_text(page_content) 350 | if not chunks: 351 | return "❌ Could not split content into chunks." 352 | 353 | all_embeddings = _embed_chunks_parallel(chunks) 354 | docs_to_insert = [] 355 | for i, ctext in enumerate(chunks): 356 | doc = { 357 | "text": ctext, 358 | "metadata": { 359 | "source": url, 360 | "source_type": "url", 361 | SESSION_FIELD: config.current_session, 362 | "chunk_index": i 363 | } 364 | } 365 | for model_name, embed_list in all_embeddings.items(): 366 | if embed_list: 367 | vector_field = EMBEDDING_CONFIG[model_name]["vector_field"] 368 | doc[vector_field] = embed_list[i] 369 | docs_to_insert.append(doc) 370 | 371 | config.collection.insert_many(docs_to_insert) 372 | return f"✅ Ingested {len(chunks)} chunks from {url} into '{config.current_session}'." 373 | 374 | except Exception as e: 375 | print_log(f"[ERROR] read_url: {e}\n{traceback.format_exc()}") 376 | return f"❌ Ingestion error: {e}" 377 | 378 | @tool 379 | def update_chunk(chunk_id: str, new_content: str) -> str: 380 | """Updates chunk text (and embeddings) by chunk ID.""" 381 | try: 382 | res = _update_chunk_in_db(chunk_id, new_content) 383 | return f"✅ {res['message']}" 384 | except Exception as e: 385 | return f"❌ Failed to update chunk: {e}" 386 | 387 | @tool 388 | def delete_chunk(chunk_id: str) -> str: 389 | """Deletes a chunk from the knowledge base by ID.""" 390 | try: 391 | res = _delete_chunk_in_db(chunk_id) 392 | return f"✅ {res['message']}" 393 | except Exception as e: 394 | return f"❌ Failed to delete chunk: {e}" 395 | 396 | @tool 397 | def switch_session(session_id: str) -> str: 398 | """Switch to another session in memory.""" 399 | config.current_session = session_id 400 | if session_id not in config.chat_history: 401 | config.chat_history[session_id] = [] 402 | return f"✅ Switched to session: **{session_id}**." 403 | 404 | @tool 405 | def create_session(session_id: str) -> str: 406 | """Create a new session in memory only (no marker doc).""" 407 | existing_sessions = config.collection.distinct(f"metadata.{SESSION_FIELD}") 408 | if session_id in existing_sessions: 409 | return f"❌ Session **'{session_id}'** already exists." 410 | 411 | config.current_session = session_id 412 | if session_id not in config.chat_history: 413 | config.chat_history[session_id] = [] 414 | return f"✅ Created and switched to new session: **{session_id}**." 415 | 416 | @tool 417 | def list_sources() -> str: 418 | """List all sources in the current session.""" 419 | sources = config.collection.distinct("metadata.source", {f"metadata.{SESSION_FIELD}": config.current_session}) 420 | if not sources: 421 | return f"No sources found in session '{config.current_session}'." 422 | return "Sources:\n" + "\n".join(f"- {s}" for s in sources) 423 | 424 | @tool 425 | def remove_all_sources() -> str: 426 | """Remove all docs from the current session.""" 427 | r = config.collection.delete_many({f"metadata.{SESSION_FIELD}": config.current_session}) 428 | return f"🗑 Removed all docs from session '{config.current_session}' (deleted {r.deleted_count})." 429 | 430 | # --- 7. AGENT PROMPT + EXECUTOR --- 431 | tools = [ 432 | search_knowledge_base, 433 | switch_session, 434 | create_session, 435 | list_sources, 436 | remove_all_sources, 437 | update_chunk, 438 | delete_chunk, 439 | read_url 440 | ] 441 | 442 | available_model_keys = list(config.embedding_clients.keys()) 443 | AGENT_SYSTEM_PROMPT = ( 444 | "You are an AI assistant designed to answer questions using a private knowledge base. " 445 | "Your primary directive is to **ALWAYS use the `search_knowledge_base` tool** to find relevant information before answering any user query. " 446 | "**Do not answer from your general knowledge.** Your answers must be based *only* on the context provided by the `search_knowledge_base` tool. " 447 | "If the tool returns no relevant information or the context is insufficient, you MUST state that you could not find an answer in the knowledge base. " 448 | f"The available `embedding_model` options for the search tool are: {', '.join(available_model_keys)}. " 449 | "For other tasks like managing sessions or sources, use the appropriate tool." 450 | ) 451 | 452 | 453 | prompt = ChatPromptTemplate.from_messages([ 454 | ("system", AGENT_SYSTEM_PROMPT), 455 | MessagesPlaceholder(variable_name="chat_history"), 456 | ("human", "{input}"), 457 | MessagesPlaceholder(variable_name="agent_scratchpad") 458 | ]) 459 | 460 | agent = create_openai_tools_agent(llm, tools, prompt) 461 | agent_executor = AgentExecutor( 462 | agent=agent, 463 | tools=tools, 464 | verbose=True, 465 | return_intermediate_steps=True 466 | ) 467 | 468 | 469 | # --- 8. FLASK APP --- 470 | app = Flask(__name__, template_folder="templates", static_folder="static") 471 | CORS(app) 472 | 473 | @app.route("/") 474 | def index(): 475 | return render_template("index.html") 476 | 477 | # ---- Ingestion Endpoints ---- 478 | @app.route("/ingest", methods=["POST"]) 479 | def start_ingestion_task(): 480 | data = request.json 481 | content = data.get("content") 482 | source = data.get("source") 483 | source_type = data.get("source_type", "unknown") 484 | session_id = data.get("session_id") 485 | chunk_size = data.get("chunk_size", 1000) 486 | chunk_overlap = data.get("chunk_overlap", 150) 487 | 488 | if not all([content, source, session_id]): 489 | return jsonify({"error": "Missing required fields."}), 400 490 | 491 | # Check duplicates 492 | if config.collection.count_documents( 493 | {"metadata.source": source, f"metadata.{SESSION_FIELD}": session_id}, 494 | limit=1 495 | ) > 0: 496 | return jsonify({"error": f"Source '{source}' already exists in session '{session_id}'."}), 409 497 | 498 | task_id = str(uuid.uuid4()) 499 | tasks[task_id] = {"status": "pending"} 500 | 501 | executor.submit( 502 | run_ingestion_task, 503 | task_id, 504 | content, 505 | source, 506 | source_type, 507 | session_id, 508 | chunk_size, 509 | chunk_overlap 510 | ) 511 | 512 | return jsonify({"task_id": task_id}), 202 513 | 514 | @app.route("/ingest/status/", methods=["GET"]) 515 | def get_ingestion_status(task_id): 516 | if task_id not in tasks: 517 | return jsonify({"status": "not_found"}), 200 518 | return jsonify(tasks[task_id]), 200 519 | 520 | # ---- Chat Endpoint ---- 521 | @app.route("/chat", methods=["POST"]) 522 | def chat(): 523 | data = request.json 524 | user_input = data.get("query") 525 | session_id = data.get("session_id") 526 | embedding_model = data.get("embedding_model", "openai") 527 | rag_params = data.get("rag_params", {}) 528 | num_sources = rag_params.get("num_sources", config.rag_config["num_sources"]) 529 | max_chunk_length = rag_params.get("max_chunk_length", config.rag_config["max_chunk_length"]) 530 | 531 | if not user_input or not session_id: 532 | return jsonify({"error": "Missing 'query' or 'session_id'"}), 400 533 | 534 | print_log(f"\n--- Turn for session '{session_id}' ---\n") 535 | original_session = config.current_session 536 | 537 | try: 538 | # Switch session in memory 539 | config.current_session = session_id 540 | 541 | # Initialize chat in memory if needed 542 | if session_id not in config.chat_history: 543 | config.chat_history[session_id] = [] 544 | 545 | # Shorten chat history if too long 546 | current_chat_history = config.chat_history[session_id] 547 | if len(current_chat_history) > 10: 548 | current_chat_history = current_chat_history[-10:] 549 | 550 | agent_input_string = ( 551 | f"User query: '{user_input}'.\n\n" 552 | f"IMPORTANT INSTRUCTION: When you call the 'search_knowledge_base' tool, " 553 | f"you MUST set the 'embedding_model' parameter to '{embedding_model}'." 554 | ) 555 | 556 | # Agent call 557 | response = agent_executor.invoke({ 558 | "input": agent_input_string, 559 | "chat_history": current_chat_history, 560 | "num_sources": num_sources, 561 | "max_chunk_length": max_chunk_length 562 | }) 563 | 564 | # Record the conversation 565 | current_chat_history.extend([ 566 | HumanMessage(content=user_input), 567 | AIMessage(content=response["output"]) 568 | ]) 569 | config.chat_history[session_id] = current_chat_history 570 | 571 | sources_used = config.last_retrieved_sources 572 | 573 | messages = [{ 574 | "type": "bot-message", 575 | "content": response["output"], 576 | "sources": sources_used 577 | }] 578 | 579 | db_sessions = set(config.collection.distinct(f"metadata.{SESSION_FIELD}") or ["default"]) 580 | mem_sessions = set(config.chat_history.keys()) 581 | all_sessions = db_sessions.union(mem_sessions) 582 | 583 | resp_data = { 584 | "messages": messages, 585 | "session_update": { 586 | "all_sessions": sorted(list(all_sessions)), 587 | "current_session": config.current_session 588 | } 589 | } 590 | return jsonify(resp_data) 591 | 592 | except Exception as e: 593 | print_log(f"[ERROR] chat endpoint: {e}\n{traceback.format_exc()}") 594 | return jsonify({"error": str(e)}), 500 595 | 596 | finally: 597 | pass 598 | 599 | # ---- Session / State Endpoints ---- 600 | @app.route("/state", methods=["GET"]) 601 | def get_state(): 602 | db_sessions = set(config.collection.distinct(f"metadata.{SESSION_FIELD}") or ["default"]) 603 | mem_sessions = set(config.chat_history.keys()) or {"default"} 604 | all_sessions = db_sessions.union(mem_sessions) 605 | 606 | return jsonify({ 607 | "all_sessions": sorted(list(all_sessions)), 608 | "current_session": config.current_session, 609 | "available_embedding_models": list(config.embedding_clients.keys()) 610 | }) 611 | 612 | @app.route("/history/clear", methods=["POST"]) 613 | def clear_history(): 614 | data = request.json 615 | session_id = data.get("session_id") 616 | if not session_id: 617 | return jsonify({"error": "Missing 'session_id'"}), 400 618 | 619 | if session_id in config.chat_history: 620 | config.chat_history[session_id] = [] 621 | msg = f"Chat history for '{session_id}' cleared." 622 | print_log("[INFO] " + msg) 623 | return jsonify({"status": "success", "message": msg}) 624 | 625 | return jsonify({"status": "not_found", "message": f"Session '{session_id}' not found."}), 404 626 | 627 | # ---- Searching / Preview Endpoints ---- 628 | @app.route("/preview_search", methods=["POST"]) 629 | def preview_search(): 630 | data = request.json 631 | query = data.get("query") 632 | session_id = data.get("session_id") 633 | embedding_model = data.get("embedding_model", "openai") 634 | num_sources = data.get("num_sources", 3) 635 | 636 | if not query or not session_id: 637 | return jsonify({"error": "Missing required fields"}), 400 638 | 639 | try: 640 | results = _perform_vector_search(query, session_id, embedding_model, num_sources) 641 | return jsonify(results) 642 | except Exception as e: 643 | print_log(f"[ERROR] preview_search: {e}") 644 | return jsonify({"error": str(e)}), 500 645 | 646 | @app.route("/preview_file", methods=["POST"]) 647 | def preview_file(): 648 | if "file" not in request.files: 649 | return jsonify({"error": "No file part"}), 400 650 | 651 | file = request.files["file"] 652 | if file.filename == "": 653 | return jsonify({"error": "No selected file"}), 400 654 | 655 | _, extension = os.path.splitext(file.filename.lower()) 656 | MAX_PREVIEW = 50000 657 | 658 | if extension in [".txt", ".md"]: 659 | text_data = file.read().decode("utf-8", errors="replace") 660 | if len(text_data) > MAX_PREVIEW: 661 | text_data = text_data[:MAX_PREVIEW] + "\n\n[TRUNCATED]" 662 | return jsonify({"content": text_data, "filename": file.filename}) 663 | 664 | temp_file_path = "" 665 | try: 666 | with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp: 667 | file.save(tmp.name) 668 | temp_file_path = tmp.name 669 | 670 | converter = DocumentConverter() 671 | result = converter.convert(temp_file_path) 672 | doc_text = result.document.export_to_markdown() 673 | if len(doc_text) > MAX_PREVIEW: 674 | doc_text = doc_text[:MAX_PREVIEW] + "\n\n[TRUNCATED]" 675 | 676 | return jsonify({ 677 | "content": doc_text, 678 | "filename": file.filename 679 | }) 680 | finally: 681 | if temp_file_path and os.path.exists(temp_file_path): 682 | os.unlink(temp_file_path) 683 | 684 | @app.route("/preview_url", methods=["GET"]) 685 | def preview_url(): 686 | url = request.args.get("url") 687 | if not url: 688 | return jsonify({"error": "URL parameter is required."}), 400 689 | 690 | jina_key = os.getenv("JINA_API_KEY") 691 | if not jina_key: 692 | return jsonify({"error": "JINA_API_KEY not set."}), 500 693 | 694 | headers = {"Authorization": f"Bearer {jina_key}", "Accept": "application/json"} 695 | try: 696 | print_log(f"[INFO] Previewing URL: {url}") 697 | resp = requests.get(f"https://r.jina.ai/{url}", headers=headers, timeout=30) 698 | resp.raise_for_status() 699 | page_content = resp.json().get("data", {}).get("content", "") 700 | MAX_PREVIEW = 50000 701 | if len(page_content) > MAX_PREVIEW: 702 | page_content = page_content[:MAX_PREVIEW] + "\n\n[TRUNCATED]" 703 | return jsonify({"markdown": page_content}) 704 | except requests.exceptions.RequestException as e: 705 | return jsonify({"error": f"Error fetching URL content: {e}"}), 500 706 | except Exception as e: 707 | return jsonify({"error": f"Unexpected error: {e}"}), 500 708 | 709 | # ---- Chunk Editing ---- 710 | @app.route("/chunk/", methods=["DELETE"]) 711 | def api_delete_chunk(chunk_id): 712 | try: 713 | return jsonify(_delete_chunk_from_db(chunk_id)) 714 | except Exception as e: 715 | return jsonify({"error": str(e)}), 500 716 | 717 | @app.route("/chunk/", methods=["PUT"]) 718 | def api_update_chunk(chunk_id): 719 | new_content = request.json.get("content") 720 | if not new_content: 721 | return jsonify({"error": "New content is required"}), 400 722 | 723 | try: 724 | return jsonify(_update_chunk_in_db(chunk_id, new_content)) 725 | except Exception as e: 726 | return jsonify({"error": str(e)}), 500 727 | 728 | # ---- Source Browsing ---- 729 | @app.route("/sources", methods=["GET"]) 730 | def get_sources(): 731 | session_id = request.args.get("session_id", "default") 732 | pipeline = [ 733 | {"$match": {f"metadata.{SESSION_FIELD}": session_id}}, 734 | { 735 | "$group": { 736 | "_id": "$metadata.source", 737 | "source_type": {"$first": "$metadata.source_type"}, 738 | "chunk_count": {"$sum": 1} 739 | } 740 | }, 741 | { 742 | "$project": { 743 | "name": "$_id", 744 | "type": {"$ifNull": ["$source_type", "unknown"]}, 745 | "chunk_count": "$chunk_count", 746 | "_id": 0 747 | } 748 | }, 749 | {"$sort": {"name": 1}} 750 | ] 751 | return jsonify(list(config.collection.aggregate(pipeline))) 752 | 753 | @app.route("/chunks", methods=["GET"]) 754 | def get_chunks(): 755 | session_id = request.args.get("session_id", "default") 756 | source_url = request.args.get("source_url") 757 | if not source_url: 758 | return jsonify({"error": "source_url required"}), 400 759 | 760 | cursor = config.collection.find( 761 | {"metadata.source": source_url, f"metadata.{SESSION_FIELD}": session_id}, 762 | {"_id": 1, "text": 1} 763 | ) 764 | return jsonify([ 765 | {"_id": str(doc["_id"]), "text": doc["text"]} 766 | for doc in cursor 767 | ]) 768 | 769 | # --- MODIFIED: Endpoint now returns a readable HTML page --- 770 | @app.route("/source_content", methods=["GET"]) 771 | def get_source_content(): 772 | session_id = request.args.get("session_id") 773 | source = request.args.get("source") 774 | 775 | if not session_id or not source: 776 | return "

Error

Missing 'session_id' or 'source' parameter.

", 400 777 | 778 | try: 779 | chunks_cursor = config.collection.find( 780 | { 781 | f"metadata.{SESSION_FIELD}": session_id, 782 | "metadata.source": source 783 | }, 784 | {"text": 1, "_id": 0} 785 | ).sort("metadata.chunk_index", pymongo.ASCENDING) 786 | 787 | full_content = "".join([chunk.get('text', '') for chunk in chunks_cursor]) 788 | 789 | if not full_content: 790 | return "

Error

Source not found or has no content.

", 404 791 | 792 | # Return a simple, styled HTML page instead of JSON 793 | html_template = """ 794 | 795 | 796 | 797 | 798 | {{ source_name }} 799 | 825 | 826 | 827 |
828 |

Source

829 |

830 | {{ source_name }} 831 |

832 |
833 |
{{ content }}
834 |
835 | 836 | 837 | """ 838 | return render_template_string(html_template, source_name=source, content=full_content) 839 | 840 | except Exception as e: 841 | print_log(f"[ERROR] /source_content: {e}\n{traceback.format_exc()}") 842 | return f"

Error

An unexpected error occurred: {str(e)}

", 500 843 | 844 | 845 | # --- 9. DB INDEX SETUP & LAUNCH --- 846 | def setup_database_and_index(): 847 | print_log("--- 🚀 Initializing DB and Vector Search Indexes ---") 848 | if COLLECTION_NAME not in config.db.list_collection_names(): 849 | config.db.create_collection(COLLECTION_NAME) 850 | 851 | for model_name, model_cfg in EMBEDDING_CONFIG.items(): 852 | if model_name not in config.embedding_clients: 853 | print_log(f"[WARN] Model '{model_name}' is not loaded, skipping index creation.") 854 | continue 855 | 856 | index_name = model_cfg["index_name"] 857 | vector_field = model_cfg["vector_field"] 858 | dims = model_cfg["dimensions"] 859 | 860 | definition = { 861 | "fields": [ 862 | { 863 | "type": "vector", 864 | "path": vector_field, 865 | "numDimensions": dims, 866 | "similarity": "cosine" 867 | }, 868 | { 869 | "type": "filter", 870 | "path": f"metadata.{SESSION_FIELD}" 871 | } 872 | ] 873 | } 874 | 875 | try: 876 | existing = next(config.collection.list_search_indexes(name=index_name), None) 877 | if not existing: 878 | print_log(f"[ACTION] Creating index '{index_name}' for model '{model_name}'...") 879 | config.collection.create_search_index( 880 | model=SearchIndexModel(name=index_name, type="vectorSearch", definition=definition) 881 | ) 882 | print_log(f"[INFO] Finished creating index '{index_name}'.") 883 | else: 884 | print_log(f"[INFO] Index '{index_name}' already exists.") 885 | except OperationFailure as e: 886 | if "already exists" in str(e).lower(): 887 | print_log(f"[INFO] Index '{index_name}' already exists. OK.") 888 | else: 889 | print_log(f"[ERROR] Creating index '{index_name}' failed: {e}") 890 | raise 891 | 892 | @app.route("/search", methods=["POST"]) 893 | def search_web(): 894 | data = request.json 895 | query = data.get("query") 896 | num_results = data.get("num_results", 5) 897 | if not query: 898 | return jsonify({"error": "Query is required"}), 400 899 | try: 900 | print_log(f"[INFO] Web search for: '{query}'") 901 | with DDGS() as ddgs: 902 | results = [r for r in ddgs.text(query, max_results=num_results)] 903 | return jsonify({"status": "success", "results": results}) 904 | except Exception as e: 905 | print_log(f"[ERROR] Web search failed: {e}\n{traceback.format_exc()}") 906 | return jsonify({"error": f"Web search error: {str(e)}"}), 500 907 | 908 | @app.route("/chunk_preview", methods=["POST"]) 909 | def chunk_preview(): 910 | data = request.json 911 | content = data.get("content") 912 | chunk_size = data.get("chunk_size", 1000) 913 | chunk_overlap = data.get("chunk_overlap", 150) 914 | 915 | if not content: 916 | return jsonify({"error": "Content is required"}), 400 917 | 918 | if chunk_overlap >= chunk_size: 919 | return jsonify({"error": "Chunk overlap must be smaller than chunk size."}), 400 920 | 921 | try: 922 | text_splitter = RecursiveCharacterTextSplitter( 923 | chunk_size=chunk_size, 924 | chunk_overlap=chunk_overlap 925 | ) 926 | chunks = text_splitter.split_text(content) 927 | return jsonify({"chunks": chunks}) 928 | except Exception as e: 929 | print_log(f"[ERROR] Chunk preview failed: {e}\n{traceback.format_exc()}") 930 | return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 931 | 932 | if __name__ == "__main__": 933 | setup_database_and_index() 934 | print_log("--- ✅ Setup complete. Starting server at http://127.0.0.1:5001 ---") 935 | app.run(debug=True, port=5001) -------------------------------------------------------------------------------- /rag/static/script.js: -------------------------------------------------------------------------------- 1 | // --------------------------- 2 | // Global state and references 3 | // --------------------------- 4 | let currentSessionId = "default"; 5 | let allSessions = []; 6 | let availableModels = []; 7 | let chunkCache = new Map(); 8 | 9 | // Dom references 10 | const chatBox = document.getElementById("chat-box"); 11 | const userInput = document.getElementById("user-input"); 12 | const chatForm = document.getElementById("chat-form"); 13 | const sessionSelector = document.getElementById("session-selector"); 14 | const newSessionBtn = document.getElementById("new-session-btn"); 15 | const clearHistoryBtn = document.getElementById("clear-history-btn"); 16 | const toolButtonsContainer = document.getElementById("tool-buttons"); 17 | const thinkingIndicator = document.getElementById("thinking-indicator"); 18 | 19 | const embeddingModelSelector = document.getElementById("embedding-model-selector"); 20 | const numSourcesInput = document.getElementById("num-sources-input"); 21 | const minScoreInput = document.getElementById("min-score-input"); 22 | const minScoreValue = document.getElementById("min-score-value"); 23 | const maxCharsInput = document.getElementById("max-chunk-length-input"); 24 | const maxCharsValue = document.getElementById("max-chars-value"); 25 | 26 | const previewRagBtn = document.getElementById("preview-rag-btn"); 27 | 28 | // Modal references 29 | const modalOverlay = document.getElementById("modal-overlay"); 30 | const modalContainer = document.getElementById("modal-container"); 31 | const modalTitle = document.getElementById("modal-title"); 32 | const modalText = document.getElementById("modal-text"); 33 | const modalContentHost = document.getElementById("modal-content-host"); 34 | const modalCancelBtn = document.getElementById("modal-btn-cancel"); 35 | const modalSubmitBtn = document.getElementById("modal-btn-submit"); 36 | 37 | // Source browser references 38 | const sourceBrowserOverlay = document.getElementById("source-browser-overlay"); 39 | const sourceBrowserContainer = document.getElementById("source-browser-container"); 40 | const sourceBrowserCloseBtn = document.getElementById("source-browser-close-btn"); 41 | const sourceListEl = document.getElementById("source-list"); 42 | const chunkListEl = document.getElementById("chunk-list"); 43 | const chunkListPlaceholder = document.getElementById("chunk-list-placeholder"); 44 | const sourceBrowserTotalChunks = document.getElementById("source-browser-total-chunks"); 45 | const sourceBrowserSourceCount = document.getElementById("source-browser-source-count"); 46 | const sourceBrowserSelectedChunkCount = document.getElementById("source-browser-selected-chunk-count"); 47 | 48 | // ----------- 49 | // Modal Logic 50 | // ----------- 51 | function showModal({ title, text, contentHTML, onSubmit, onCancel }) { 52 | modalTitle.textContent = title || "Modal Title"; 53 | modalText.textContent = text || ""; 54 | modalContentHost.innerHTML = contentHTML || ""; 55 | 56 | if (onSubmit) { 57 | modalSubmitBtn.onclick = () => { 58 | onSubmit(); 59 | }; 60 | } else { 61 | modalSubmitBtn.onclick = () => { 62 | hideModal(); 63 | }; 64 | } 65 | 66 | if (onCancel) { 67 | modalCancelBtn.onclick = () => { 68 | onCancel(); 69 | }; 70 | } else { 71 | modalCancelBtn.onclick = () => { 72 | hideModal(); 73 | }; 74 | } 75 | 76 | modalOverlay.classList.remove("invisible", "opacity-0"); 77 | modalContainer.classList.remove("scale-95", "opacity-0"); 78 | } 79 | 80 | function hideModal() { 81 | modalOverlay.classList.add("opacity-0", "invisible"); 82 | modalContainer.classList.add("scale-95", "opacity-0"); 83 | 84 | modalTitle.textContent = ""; 85 | modalText.textContent = ""; 86 | modalContentHost.innerHTML = ""; 87 | modalSubmitBtn.onclick = null; 88 | modalCancelBtn.onclick = null; 89 | } 90 | 91 | modalOverlay.addEventListener("click", (e) => { 92 | if (e.target === modalOverlay) { 93 | hideModal(); 94 | } 95 | }); 96 | 97 | // ------------------------ 98 | // Source Browser Functions 99 | // ------------------------ 100 | function openSourceBrowser() { 101 | sourceBrowserOverlay.classList.remove("opacity-0", "invisible"); 102 | sourceBrowserContainer.classList.remove("scale-95", "opacity-0"); 103 | sourceBrowserTotalChunks.textContent = ""; 104 | sourceBrowserSourceCount.textContent = "Total: 0"; 105 | sourceBrowserSelectedChunkCount.textContent = "Selected: 0"; 106 | 107 | fetch(`/sources?session_id=${encodeURIComponent(currentSessionId)}`) 108 | .then((r) => r.json()) 109 | .then((data) => { 110 | sourceListEl.innerHTML = ""; 111 | chunkListEl.innerHTML = ""; 112 | chunkListPlaceholder.style.display = "block"; 113 | 114 | if (!data || data.length === 0) { 115 | sourceListEl.innerHTML = "

No sources found.

"; 116 | return; 117 | } 118 | 119 | sourceBrowserSourceCount.textContent = `Total: ${data.length}`; 120 | let totalChunks = 0; 121 | data.forEach(src => totalChunks += (src.chunk_count || 0)); 122 | sourceBrowserTotalChunks.textContent = `(${totalChunks.toLocaleString()} Total Chunks)`; 123 | 124 | data.forEach((src) => { 125 | const btn = document.createElement("button"); 126 | btn.className = "source-item"; 127 | 128 | const chunkCount = src.chunk_count !== undefined ? `${src.chunk_count}` : '?'; 129 | const sourceName = src.name + (src.type ? ` (${src.type})` : ""); 130 | 131 | btn.innerHTML = ` 132 | ${escapeHtml(sourceName)} 133 | ${chunkCount} 134 | `; 135 | 136 | btn.onclick = () => { 137 | document.querySelectorAll('.source-item').forEach(b => b.classList.remove('active')); 138 | btn.classList.add('active'); 139 | loadChunksForSource(src.name); 140 | }; 141 | sourceListEl.appendChild(btn); 142 | }); 143 | }) 144 | .catch((err) => { 145 | console.error("Failed to list sources:", err); 146 | sourceListEl.innerHTML = `

Error: ${err.message}

`; 147 | }); 148 | } 149 | 150 | function loadChunksForSource(sourceUrl) { 151 | sourceBrowserSelectedChunkCount.textContent = "Loading..."; 152 | fetch(`/chunks?session_id=${encodeURIComponent(currentSessionId)}&source_url=` + encodeURIComponent(sourceUrl)) 153 | .then((r) => r.json()) 154 | .then((data) => { 155 | chunkListEl.innerHTML = ""; 156 | chunkCache.clear(); 157 | 158 | if (data.error) { 159 | chunkListEl.innerHTML = `

Error: ${data.error}

`; 160 | sourceBrowserSelectedChunkCount.textContent = "Error"; 161 | return; 162 | } 163 | if (!data || data.length === 0) { 164 | chunkListEl.innerHTML = "

No chunks found for this source.

"; 165 | sourceBrowserSelectedChunkCount.textContent = "Selected: 0"; 166 | return; 167 | } 168 | 169 | sourceBrowserSelectedChunkCount.textContent = `Selected: ${data.length}`; 170 | 171 | chunkListPlaceholder.style.display = "none"; 172 | data.forEach((ch) => { 173 | chunkCache.set(ch._id, ch); 174 | const card = document.createElement("div"); 175 | card.className = "chunk-card"; 176 | card.setAttribute("data-chunk-id", ch._id); // Add ID for easier selection 177 | card.innerHTML = ` 178 |
179 |
Chunk ID: ${ch._id}
180 |
181 | 182 | 183 |
184 |
185 |
${marked.parse(ch.text || "")}
186 | `; 187 | chunkListEl.appendChild(card); 188 | }); 189 | }) 190 | .catch((err) => { 191 | console.error("Failed to load chunks:", err); 192 | chunkListEl.innerHTML = `

Error: ${err.message}

`; 193 | sourceBrowserSelectedChunkCount.textContent = "Error"; 194 | }); 195 | } 196 | 197 | // ----------------------------------------------------------- 198 | // CORRECTED: A single, robust event listener for all chunk buttons 199 | // (Handles Edit, Delete, Save, and Cancel) 200 | // ----------------------------------------------------------- 201 | chunkListEl.addEventListener('click', (event) => { 202 | const editButton = event.target.closest('.chunk-edit-btn'); 203 | if (editButton) { 204 | const chunkId = editButton.getAttribute('data-id'); 205 | startChunkEdit(chunkId); 206 | return; 207 | } 208 | 209 | const deleteButton = event.target.closest('.chunk-delete-btn'); 210 | if (deleteButton) { 211 | const chunkId = deleteButton.getAttribute('data-id'); 212 | onDeleteChunkClick(chunkId); // Uses existing delete logic 213 | return; 214 | } 215 | 216 | const saveButton = event.target.closest('.chunk-save-btn'); 217 | if (saveButton) { 218 | const chunkId = saveButton.getAttribute('data-id'); 219 | saveChunkEdit(chunkId); 220 | return; 221 | } 222 | 223 | const cancelButton = event.target.closest('.chunk-cancel-btn'); 224 | if (cancelButton) { 225 | const chunkId = cancelButton.getAttribute('data-id'); 226 | cancelChunkEdit(chunkId); 227 | return; 228 | } 229 | }); 230 | 231 | 232 | sourceBrowserCloseBtn.addEventListener("click", () => { 233 | closeSourceBrowser(); 234 | }); 235 | 236 | function closeSourceBrowser() { 237 | sourceBrowserOverlay.classList.add("opacity-0", "invisible"); 238 | sourceBrowserContainer.classList.add("scale-95", "opacity-0"); 239 | sourceListEl.innerHTML = ""; 240 | chunkListEl.innerHTML = ""; 241 | chunkListPlaceholder.style.display = "block"; 242 | sourceBrowserTotalChunks.textContent = ""; 243 | sourceBrowserSourceCount.textContent = "Total: 0"; 244 | sourceBrowserSelectedChunkCount.textContent = "Selected: 0"; 245 | } 246 | 247 | // ------------------------------------------------- 248 | // --- NEW IN-PLACE CHUNK EDITING LOGIC --- 249 | // ------------------------------------------------- 250 | 251 | function startChunkEdit(chunkId) { 252 | const chunkCard = chunkListEl.querySelector(`.chunk-card[data-chunk-id='${chunkId}']`); 253 | if (!chunkCard || chunkCard.classList.contains('is-editing')) return; 254 | 255 | const chunkData = chunkCache.get(chunkId); 256 | if (!chunkData) { 257 | alert("Error: Could not find chunk data to edit."); 258 | return; 259 | } 260 | 261 | chunkCard.classList.add('is-editing'); 262 | const contentHost = chunkCard.querySelector('.chunk-content'); 263 | const actionsHost = chunkCard.querySelector('.chunk-actions'); 264 | 265 | // Store original HTML for cancellation 266 | chunkCard.dataset.originalContent = contentHost.innerHTML; 267 | chunkCard.dataset.originalActions = actionsHost.innerHTML; 268 | 269 | // Inject the textarea and new buttons 270 | contentHost.innerHTML = ` 271 | 272 | `; 273 | actionsHost.innerHTML = ` 274 | 275 | 276 | `; 277 | 278 | // Auto-resize and focus the textarea 279 | const textarea = contentHost.querySelector('textarea'); 280 | const autoResize = () => { 281 | textarea.style.height = 'auto'; 282 | textarea.style.height = (textarea.scrollHeight) + 'px'; 283 | }; 284 | textarea.addEventListener('input', autoResize); 285 | autoResize(); 286 | textarea.focus(); 287 | } 288 | 289 | function cancelChunkEdit(chunkId) { 290 | const chunkCard = chunkListEl.querySelector(`.chunk-card[data-chunk-id='${chunkId}']`); 291 | if (!chunkCard || !chunkCard.classList.contains('is-editing')) return; 292 | 293 | const contentHost = chunkCard.querySelector('.chunk-content'); 294 | const actionsHost = chunkCard.querySelector('.chunk-actions'); 295 | 296 | // Restore original content from dataset 297 | contentHost.innerHTML = chunkCard.dataset.originalContent; 298 | actionsHost.innerHTML = chunkCard.dataset.originalActions; 299 | 300 | chunkCard.classList.remove('is-editing'); 301 | delete chunkCard.dataset.originalContent; 302 | delete chunkCard.dataset.originalActions; 303 | } 304 | 305 | function saveChunkEdit(chunkId) { 306 | const chunkCard = chunkListEl.querySelector(`.chunk-card[data-chunk-id='${chunkId}']`); 307 | if (!chunkCard) return; 308 | 309 | const textarea = chunkCard.querySelector('.chunk-edit-textarea'); 310 | const newText = textarea.value; 311 | const saveBtn = chunkCard.querySelector('.chunk-save-btn'); 312 | saveBtn.textContent = 'Saving...'; 313 | saveBtn.disabled = true; 314 | 315 | fetch("/chunk/" + encodeURIComponent(chunkId), { 316 | method: "PUT", 317 | headers: { "Content-Type": "application/json" }, 318 | body: JSON.stringify({ content: newText }), 319 | }) 320 | .then(r => r.json()) 321 | .then(resp => { 322 | if (resp.error) { 323 | alert("Error updating chunk: " + resp.error); 324 | saveBtn.textContent = 'Save'; 325 | saveBtn.disabled = false; // Re-enable on failure 326 | return; 327 | } 328 | // Update local cache 329 | const chunkData = chunkCache.get(chunkId); 330 | chunkData.text = newText; 331 | chunkCache.set(chunkId, chunkData); 332 | 333 | // Restore view mode with the *new* content 334 | const contentHost = chunkCard.querySelector('.chunk-content'); 335 | const actionsHost = chunkCard.querySelector('.chunk-actions'); 336 | 337 | contentHost.innerHTML = marked.parse(newText); 338 | actionsHost.innerHTML = chunkCard.dataset.originalActions; // Restore original buttons 339 | 340 | chunkCard.classList.remove('is-editing'); 341 | delete chunkCard.dataset.originalContent; 342 | delete chunkCard.dataset.originalActions; 343 | }) 344 | .catch(err => { 345 | alert("Error updating chunk: " + err.message); 346 | saveBtn.textContent = 'Save'; 347 | saveBtn.disabled = false; 348 | }); 349 | } 350 | 351 | function onDeleteChunkClick(chunkId) { 352 | if (!confirm("Are you sure you want to delete this chunk?")) return; 353 | fetch(`/chunk/${encodeURIComponent(chunkId)}`, { method: "DELETE" }) 354 | .then((r) => r.json()) 355 | .then((resp) => { 356 | if (resp.error) { 357 | alert("Error deleting chunk: " + resp.error); 358 | return; 359 | } 360 | const chunkCard = chunkListEl.querySelector(`.chunk-card[data-chunk-id='${chunkId}']`); 361 | if (chunkCard) { 362 | chunkCard.remove(); 363 | chunkCache.delete(chunkId); 364 | } 365 | }) 366 | .catch((err) => { 367 | alert("Error deleting chunk: " + err.message); 368 | }); 369 | } 370 | 371 | // -------- 372 | // Helpers 373 | // -------- 374 | function escapeHtml(unsafe) { 375 | if (!unsafe) return ""; 376 | return unsafe.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """).replaceAll("'", "'"); 377 | } 378 | 379 | function escapeHtmlForTextarea(str) { 380 | return str.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """); 381 | } 382 | 383 | // --------------------------------------------- 384 | // Chat Rendering (add messages to the chat box) 385 | // --------------------------------------------- 386 | function addBotMessage(message) { 387 | const content = message.content; 388 | const sources = message.sources || []; 389 | 390 | const messageEl = document.createElement("div"); 391 | messageEl.className = "message bot-message flex flex-col p-4 bg-gray-700 rounded-lg animate-fade-in-up"; 392 | 393 | const contentDiv = document.createElement("div"); 394 | contentDiv.className = "prose prose-invert max-w-none"; 395 | 396 | if (content.trim().startsWith(' 0) { 404 | let sourceLinksHTML = sources.map(source => { 405 | const href = `/source_content?session_id=${encodeURIComponent(currentSessionId)}&source=${encodeURIComponent(source)}`; 406 | const target = `target="_blank" rel="noopener noreferrer"`; 407 | 408 | let displayName = source; 409 | try { 410 | if (source.startsWith('http')) displayName = new URL(source).hostname; 411 | } catch (e) { /* use original source name */ } 412 | 413 | return ` 414 | 415 | 416 | 417 | 418 | 419 | ${escapeHtml(displayName)} 420 | 421 | `; 422 | }).join(''); 423 | 424 | const sourcesContainer = document.createElement("div"); 425 | sourcesContainer.className = "source-links mt-4 pt-4 border-t border-gray-600"; 426 | sourcesContainer.innerHTML = ` 427 |

Sources

428 |
429 | ${sourceLinksHTML} 430 |
431 | `; 432 | messageEl.appendChild(sourcesContainer); 433 | } 434 | 435 | chatBox.appendChild(messageEl); 436 | chatBox.scrollTop = chatBox.scrollHeight; 437 | } 438 | 439 | function addUserMessage(content) { 440 | const messageEl = document.createElement("div"); 441 | messageEl.className = "message user-message bg-gray-600 p-3 rounded-lg animate-fade-in-up text-right"; 442 | messageEl.textContent = content; 443 | chatBox.appendChild(messageEl); 444 | chatBox.scrollTop = chatBox.scrollHeight; 445 | } 446 | 447 | function addSystemMessage(content) { 448 | const div = document.createElement("div"); 449 | div.className = "message system-message bg-yellow-900/50 text-yellow-300 border-l-4 border-yellow-500 p-3 rounded-r-lg animate-fade-in-up"; 450 | div.innerHTML = `System: ${content}`; 451 | chatBox.appendChild(div); 452 | chatBox.scrollTop = chatBox.scrollHeight; 453 | } 454 | 455 | function setThinking(isThinking) { 456 | const indicator = document.getElementById("thinking-indicator"); 457 | if (isThinking) { 458 | indicator.classList.remove("invisible", "opacity-0"); 459 | chatBox.scrollTop = chatBox.scrollHeight; 460 | } else { 461 | indicator.classList.add("invisible", "opacity-0"); 462 | } 463 | } 464 | 465 | // ------------------------- 466 | // Session / State Functions 467 | // ------------------------- 468 | function loadSessionsAndState() { 469 | fetch("/state") 470 | .then((r) => r.json()) 471 | .then((data) => { 472 | allSessions = data.all_sessions || []; 473 | availableModels = data.available_embedding_models || []; 474 | currentSessionId = data.current_session || "default"; 475 | 476 | sessionSelector.innerHTML = ""; 477 | allSessions.forEach((s) => { 478 | const opt = document.createElement("option"); 479 | opt.value = s; 480 | opt.textContent = s; 481 | if (s === currentSessionId) { 482 | opt.selected = true; 483 | } 484 | sessionSelector.appendChild(opt); 485 | }); 486 | 487 | const selectedModel = embeddingModelSelector.value; 488 | embeddingModelSelector.innerHTML = ""; 489 | availableModels.forEach((m) => { 490 | const opt = document.createElement("option"); 491 | opt.value = m; 492 | opt.textContent = m; 493 | embeddingModelSelector.appendChild(opt); 494 | }); 495 | if (selectedModel && availableModels.includes(selectedModel)) { 496 | embeddingModelSelector.value = selectedModel; 497 | } 498 | }) 499 | .catch((err) => { 500 | console.error("Failed to load state:", err); 501 | }); 502 | } 503 | 504 | function switchSession(sessionId) { 505 | fetch("/chat", { 506 | method: "POST", 507 | headers: { "Content-Type": "application/json" }, 508 | body: JSON.stringify({ 509 | query: `switch_session ${sessionId}`, 510 | session_id: currentSessionId, 511 | }), 512 | }) 513 | .then((r) => r.json()) 514 | .then((data) => { 515 | if (data.error) { 516 | console.error("Error switching session:", data.error); 517 | } else { 518 | loadSessionsAndState(); 519 | chatBox.innerHTML = ''; 520 | const welcomeDiv = document.createElement("div"); 521 | welcomeDiv.className = "message system-message animate-fade-in-up bg-yellow-900/50 text-yellow-300 border-l-4 border-yellow-500 p-4 rounded-r-lg"; 522 | welcomeDiv.innerHTML = `Switched to session: ${sessionId}`; 523 | chatBox.appendChild(welcomeDiv); 524 | } 525 | }) 526 | .catch((err) => console.error("Failed to switch session:", err)); 527 | } 528 | 529 | function createSession(newSessionName) { 530 | fetch("/chat", { 531 | method: "POST", 532 | headers: { "Content-Type": "application/json" }, 533 | body: JSON.stringify({ 534 | query: `create_session ${newSessionName}`, 535 | session_id: currentSessionId, 536 | }), 537 | }) 538 | .then((r) => r.json()) 539 | .then((data) => { 540 | if (data.error) { 541 | console.error("Error creating session:", data.error); 542 | alert("Error creating session: " + data.error); 543 | } else { 544 | addSystemMessage(`Created and switched to new session: ${newSessionName}`); 545 | loadSessionsAndState(); 546 | } 547 | }) 548 | .catch((err) => console.error("Failed to create session:", err)); 549 | } 550 | 551 | // ------ 552 | // Events 553 | // ------ 554 | document.addEventListener("DOMContentLoaded", () => { 555 | loadSessionsAndState(); 556 | }); 557 | 558 | sessionSelector.addEventListener("change", () => { 559 | const sel = sessionSelector.value; 560 | if (sel !== currentSessionId) { 561 | switchSession(sel); 562 | } 563 | }); 564 | 565 | newSessionBtn.addEventListener("click", () => { 566 | const name = prompt("Enter new session name:"); 567 | if (name) { 568 | createSession(name.trim()); 569 | } 570 | }); 571 | 572 | clearHistoryBtn.addEventListener("click", () => { 573 | if (!confirm("Clear chat history for this session?")) return; 574 | fetch("/history/clear", { 575 | method: "POST", 576 | headers: { "Content-Type": "application/json" }, 577 | body: JSON.stringify({ session_id: currentSessionId }), 578 | }) 579 | .then((r) => r.json()) 580 | .then((data) => { 581 | if (data.error) { 582 | console.error("Error clearing history:", data.error); 583 | } else { 584 | chatBox.innerHTML = ""; 585 | const welcomeDiv = document.createElement("div"); 586 | welcomeDiv.className = "message system-message animate-fade-in-up bg-yellow-900/50 text-yellow-300 border-l-4 border-yellow-500 p-4 rounded-r-lg"; 587 | welcomeDiv.innerHTML = "Welcome! Use the Control Panel on the right to manage sessions, add data, and fine-tune retrieval settings."; 588 | chatBox.appendChild(welcomeDiv); 589 | } 590 | }) 591 | .catch((err) => console.error("Failed to clear history:", err)); 592 | }); 593 | 594 | chatForm.addEventListener("submit", (event) => { 595 | event.preventDefault(); 596 | const text = userInput.value.trim(); 597 | if (!text) return; 598 | 599 | addUserMessage(text); 600 | setThinking(true); 601 | 602 | const embeddingModel = embeddingModelSelector.value; 603 | const numSources = parseInt(numSourcesInput.value) || 3; 604 | const maxChunkLen = parseInt(maxCharsInput.value) || 2000; 605 | 606 | const payload = { 607 | query: text, 608 | session_id: currentSessionId, 609 | embedding_model: embeddingModel, 610 | rag_params: { 611 | num_sources: numSources, 612 | max_chunk_length: maxChunkLen, 613 | }, 614 | }; 615 | 616 | fetch("/chat", { 617 | method: "POST", 618 | headers: { "Content-Type": "application/json" }, 619 | body: JSON.stringify(payload), 620 | }) 621 | .then((r) => r.json()) 622 | .then((data) => { 623 | if (data.error) { 624 | addBotMessage({ content: `Error: ${data.error}` }); 625 | return; 626 | } 627 | const msgs = data.messages || []; 628 | msgs.forEach((m) => { 629 | if (m.type === "bot-message") { 630 | addBotMessage(m); 631 | } else if (m.type === "system-message") { 632 | addSystemMessage(m.content); 633 | } 634 | }); 635 | if (data.session_update) { 636 | loadSessionsAndState(); 637 | } 638 | }) 639 | .catch((err) => { 640 | addBotMessage({ content: `Error: ${err.message}` }); 641 | }) 642 | .finally(() => { 643 | setThinking(false); 644 | userInput.value = ""; 645 | userInput.focus(); 646 | userInput.style.height = 'auto'; 647 | }); 648 | }); 649 | 650 | userInput.addEventListener('input', () => { 651 | userInput.style.height = 'auto'; 652 | userInput.style.height = (userInput.scrollHeight) + 'px'; 653 | }); 654 | 655 | userInput.addEventListener("keydown", (event) => { 656 | if (event.key === "Enter" && !event.shiftKey) { 657 | event.preventDefault(); 658 | chatForm.dispatchEvent(new Event('submit')); 659 | } 660 | }); 661 | 662 | toolButtonsContainer.addEventListener("click", (event) => { 663 | const btn = event.target.closest("button[data-action]"); 664 | if (!btn) return; 665 | const action = btn.getAttribute("data-action"); 666 | handleToolAction(action); 667 | }); 668 | 669 | function handleToolAction(action) { 670 | if (action === "read_url") { 671 | handleReadUrlAndChunking(); 672 | } else if (action === "read_file") { 673 | handleReadFile(); 674 | } else if (action === "browse_sources") { 675 | openSourceBrowser(); 676 | } else if (action === "search_web") { 677 | handleWebSearch(); 678 | } else if (action === "list_sources" || action === "remove_all") { 679 | const command = action === "list_sources" ? "list_sources" : "remove_all_sources"; 680 | if (action === "remove_all" && !confirm("Are you sure you want to remove all sources in this session?")) { 681 | return; 682 | } 683 | 684 | addUserMessage(command); 685 | setThinking(true); 686 | fetch("/chat", { 687 | method: "POST", 688 | headers: { "Content-Type": "application/json" }, 689 | body: JSON.stringify({ query: command, session_id: currentSessionId }), 690 | }) 691 | .then(r => r.json()) 692 | .then(data => { 693 | if (data.error) { 694 | addBotMessage({ content: `Error: ${data.error}` }); 695 | } else { 696 | (data.messages || []).forEach(m => { 697 | if (m.type === "bot-message" || m.type === "system-message") { 698 | addBotMessage(m); 699 | } 700 | }); 701 | } 702 | }) 703 | .catch(err => addBotMessage({ content: `Error: ${err.message}` })) 704 | .finally(() => setThinking(false)); 705 | } 706 | } 707 | 708 | // ------------------------------------ 709 | // --- NEW INGESTION MODAL LOGIC --- 710 | // ------------------------------------ 711 | 712 | async function renderChunkPreview(content, chunkSize, chunkOverlap, targetElementId, countElementId) { 713 | const targetEl = document.getElementById(targetElementId); 714 | const countEl = document.getElementById(countElementId); 715 | if (!targetEl || !countEl) return; 716 | 717 | targetEl.innerHTML = '
'; 718 | countEl.textContent = 'Total Chunks: ...'; 719 | 720 | if (chunkOverlap >= chunkSize) { 721 | targetEl.innerHTML = '

Error: Chunk overlap must be smaller than chunk size.

'; 722 | countEl.textContent = 'Total Chunks: 0'; 723 | return false; 724 | } 725 | 726 | try { 727 | const response = await fetch("/chunk_preview", { 728 | method: "POST", 729 | headers: { "Content-Type": "application/json" }, 730 | body: JSON.stringify({ content, chunk_size: chunkSize, chunk_overlap: chunkOverlap }), 731 | }); 732 | const data = await response.json(); 733 | 734 | if (data.error) { 735 | targetEl.innerHTML = `

Error chunking: ${escapeHtml(data.error)}

`; 736 | countEl.textContent = 'Total Chunks: 0'; 737 | return false; 738 | } 739 | 740 | if (!data.chunks || data.chunks.length === 0) { 741 | targetEl.innerHTML = '

Could not generate any chunks from the source content.

'; 742 | countEl.textContent = 'Total Chunks: 0'; 743 | return false; 744 | } 745 | 746 | const chunkHtml = data.chunks.map((c, i) => ` 747 |
748 |
Chunk ${i + 1}
749 |
${escapeHtml(c)}
750 |
751 | `).join(''); 752 | 753 | targetEl.innerHTML = `
${chunkHtml}
`; 754 | countEl.textContent = `Total Chunks: ${data.chunks.length}`; 755 | return true; 756 | } catch (err) { 757 | targetEl.innerHTML = `

Request error: ${escapeHtml(err.message)}

`; 758 | countEl.textContent = 'Total: 0'; 759 | return false; 760 | } 761 | } 762 | 763 | function handleReadFile() { 764 | let sourceName = ''; 765 | let currentFile = null; 766 | 767 | const modalHTML = ` 768 |
769 | 770 |
771 | 772 | 773 | 774 |

Drag & drop your file here

775 |

or click to browse

776 |
777 | 784 |
785 |
786 |
787 |

Source Content (Editable)

788 |
789 | 790 |
791 |
792 |
793 |
794 |

Chunk Preview

795 | Total: 0 796 |
797 |
798 |

Chunks will appear here.

799 |
800 |
801 |
802 |
803 |
804 | 805 | 806 |
807 |
808 | 809 | 810 |
811 | 812 |
`; 813 | 814 | showModal({ 815 | title: "Add File to Knowledge Base", 816 | text: "Drop a file or click the area below, edit content if needed, adjust chunking, and submit to ingest.", 817 | contentHTML: modalHTML, 818 | onSubmit: () => { 819 | const content = document.getElementById('ingestion-source-content-textarea').value; 820 | if (!content || !sourceName) { 821 | alert('Please select and load a file first.'); 822 | return; 823 | } 824 | const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value); 825 | const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value); 826 | 827 | if (chunkOverlap >= chunkSize) { 828 | alert("Chunk overlap must be less than chunk size."); 829 | return; 830 | } 831 | 832 | fetch("/ingest", { 833 | method: "POST", 834 | headers: { "Content-Type": "application/json" }, 835 | body: JSON.stringify({ 836 | content: content, 837 | source: sourceName, 838 | source_type: "file", 839 | session_id: currentSessionId, 840 | chunk_size: chunkSize, 841 | chunk_overlap: chunkOverlap, 842 | }), 843 | }).then(r => r.json()).then(resp => { 844 | if (resp.error) { 845 | alert(`Error ingesting file: ${resp.error}`); 846 | } else if (resp.task_id) { 847 | hideModal(); 848 | pollIngestionTask(resp.task_id); 849 | } 850 | }).catch(err => alert(`Error: ${err.message}`)); 851 | } 852 | }); 853 | 854 | const dropZone = document.getElementById('file-drop-zone'); 855 | const fileInput = document.getElementById('ingestion-file-input'); 856 | const dropZonePrompt = document.getElementById('file-drop-zone-prompt'); 857 | const dropZoneDisplay = document.getElementById('file-drop-zone-display'); 858 | const fileNameDisplay = document.getElementById('file-name-display'); 859 | const contentTextarea = document.getElementById('ingestion-source-content-textarea'); 860 | const rechunkBtn = document.getElementById('ingestion-rechunk-btn'); 861 | 862 | const processFile = (file) => { 863 | if (!file) return; 864 | currentFile = file; 865 | 866 | fileNameDisplay.textContent = file.name; 867 | dropZonePrompt.classList.add('hidden'); 868 | dropZoneDisplay.classList.remove('hidden'); 869 | dropZoneDisplay.classList.add('flex'); 870 | 871 | contentTextarea.value = 'Loading file content...'; 872 | const formData = new FormData(); 873 | formData.append('file', file); 874 | 875 | fetch('/preview_file', { method: 'POST', body: formData }) 876 | .then(r => r.json()).then(data => { 877 | if (data.error) { 878 | contentTextarea.value = `Error: ${escapeHtml(data.error)}`; 879 | return; 880 | } 881 | sourceName = data.filename; 882 | contentTextarea.value = data.content; 883 | 884 | const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value); 885 | const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value); 886 | renderChunkPreview(data.content, chunkSize, chunkOverlap, 'ingestion-chunk-preview-host', 'ingestion-chunk-count'); 887 | }).catch(err => { 888 | contentTextarea.value = `Fetch error: ${escapeHtml(err.message)}`; 889 | }); 890 | } 891 | 892 | dropZone.addEventListener('click', () => fileInput.click()); 893 | dropZone.addEventListener('dragover', (e) => { 894 | e.preventDefault(); 895 | dropZone.classList.add('drop-zone-dragover'); 896 | }); 897 | dropZone.addEventListener('dragleave', () => dropZone.classList.remove('drop-zone-dragover')); 898 | dropZone.addEventListener('drop', (e) => { 899 | e.preventDefault(); 900 | dropZone.classList.remove('drop-zone-dragover'); 901 | if (e.dataTransfer.files.length > 0) { 902 | fileInput.files = e.dataTransfer.files; 903 | processFile(e.dataTransfer.files[0]); 904 | } 905 | }); 906 | fileInput.addEventListener('change', () => { 907 | if (fileInput.files.length > 0) { 908 | processFile(fileInput.files[0]); 909 | } 910 | }); 911 | 912 | rechunkBtn.addEventListener('click', () => { 913 | const content = contentTextarea.value; 914 | if (!content) { alert('Load a file first.'); return; } 915 | const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value); 916 | const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value); 917 | renderChunkPreview(content, chunkSize, chunkOverlap, 'ingestion-chunk-preview-host', 'ingestion-chunk-count'); 918 | rechunkBtn.classList.remove('needs-update'); 919 | }); 920 | 921 | contentTextarea.addEventListener('input', () => rechunkBtn.classList.add('needs-update')); 922 | document.getElementById('ingestion-chunk-size').addEventListener('input', () => rechunkBtn.classList.add('needs-update')); 923 | document.getElementById('ingestion-chunk-overlap').addEventListener('input', () => rechunkBtn.classList.add('needs-update')); 924 | } 925 | 926 | function handleReadUrlAndChunking(initialUrl = '') { 927 | const modalHTML = ` 928 |
929 | 930 | 931 |
932 |
933 |
934 |

Source Content (Editable)

935 |
936 | 937 |
938 |
939 |
940 |
941 |

Chunk Preview

942 | Total: 0 943 |
944 |
945 |

Chunks will appear here.

946 |
947 |
948 |
949 |
950 |
951 | 952 | 953 |
954 |
955 | 956 | 957 |
958 | 959 |
`; 960 | 961 | showModal({ 962 | title: "Add URL to Knowledge Base", 963 | text: "Fetch content, edit if needed, adjust chunking, and submit to ingest.", 964 | contentHTML: modalHTML, 965 | onSubmit: () => { 966 | const url = document.getElementById('ingestion-url-input').value.trim(); 967 | const content = document.getElementById('ingestion-source-content-textarea').value; 968 | if (!url || !content) { 969 | alert('Please load the URL content first.'); 970 | return; 971 | } 972 | const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value); 973 | const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value); 974 | 975 | if (chunkOverlap >= chunkSize) { 976 | alert("Chunk overlap must be less than chunk size."); 977 | return; 978 | } 979 | 980 | fetch("/ingest", { 981 | method: "POST", 982 | headers: { "Content-Type": "application/json" }, 983 | body: JSON.stringify({ 984 | content: content, 985 | source: url, 986 | source_type: "url", 987 | session_id: currentSessionId, 988 | chunk_size: chunkSize, 989 | chunk_overlap: chunkOverlap, 990 | }), 991 | }).then(r => r.json()).then(resp => { 992 | if (resp.error) { 993 | alert(`Error ingesting URL: ${resp.error}`); 994 | } else if (resp.task_id) { 995 | hideModal(); 996 | pollIngestionTask(resp.task_id); 997 | } 998 | }).catch(err => alert(`Error: ${err.message}`)); 999 | } 1000 | }); 1001 | 1002 | const urlInput = document.getElementById('ingestion-url-input'); 1003 | const loadBtn = document.getElementById('ingestion-load-url-btn'); 1004 | const contentTextarea = document.getElementById('ingestion-source-content-textarea'); 1005 | const rechunkBtn = document.getElementById('ingestion-rechunk-btn'); 1006 | 1007 | const loadUrlContent = () => { 1008 | const url = urlInput.value.trim(); 1009 | if (!url) return; 1010 | contentTextarea.value = 'Loading URL content...'; 1011 | 1012 | fetch(`/preview_url?url=${encodeURIComponent(url)}`) 1013 | .then(r => r.json()).then(data => { 1014 | if (data.error) { 1015 | contentTextarea.value = `Error: ${escapeHtml(data.error)}`; 1016 | return; 1017 | } 1018 | contentTextarea.value = data.markdown; 1019 | 1020 | const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value); 1021 | const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value); 1022 | renderChunkPreview(data.markdown, chunkSize, chunkOverlap, 'ingestion-chunk-preview-host', 'ingestion-chunk-count'); 1023 | }).catch(err => { 1024 | contentTextarea.value = `Fetch error: ${escapeHtml(err.message)}`; 1025 | }); 1026 | }; 1027 | 1028 | loadBtn.addEventListener('click', loadUrlContent); 1029 | rechunkBtn.addEventListener('click', () => { 1030 | const content = contentTextarea.value; 1031 | if (!content) { alert('Load URL content first.'); return; } 1032 | const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value); 1033 | const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value); 1034 | renderChunkPreview(content, chunkSize, chunkOverlap, 'ingestion-chunk-preview-host', 'ingestion-chunk-count'); 1035 | rechunkBtn.classList.remove('needs-update'); 1036 | }); 1037 | 1038 | contentTextarea.addEventListener('input', () => rechunkBtn.classList.add('needs-update')); 1039 | document.getElementById('ingestion-chunk-size').addEventListener('input', () => rechunkBtn.classList.add('needs-update')); 1040 | document.getElementById('ingestion-chunk-overlap').addEventListener('input', () => rechunkBtn.classList.add('needs-update')); 1041 | 1042 | if (initialUrl) { 1043 | loadUrlContent(); 1044 | } 1045 | } 1046 | 1047 | function handleWebSearch() { 1048 | showModal({ 1049 | title: "Search the Web", 1050 | text: "Enter your search query to do a DuckDuckGo-based web search:", 1051 | contentHTML: ``, 1052 | onSubmit: () => { 1053 | const query = document.getElementById("web-search-input").value.trim(); 1054 | if (!query) { 1055 | alert("No query provided"); 1056 | return; 1057 | } 1058 | hideModal(); 1059 | addUserMessage(`web_search ${query}`); 1060 | setThinking(true); 1061 | 1062 | fetch("/search", { 1063 | method: "POST", 1064 | headers: { "Content-Type": "application/json" }, 1065 | body: JSON.stringify({ query, num_results: 5 }), 1066 | }) 1067 | .then((r) => r.json()) 1068 | .then((data) => { 1069 | if (data.error) { 1070 | addBotMessage({ content: `Web search error: ${data.error}` }); 1071 | } else if (data.results && data.results.length > 0) { 1072 | const resultsHtml = data.results.map((r) => { 1073 | const isValidUrl = r.href && (r.href.startsWith('http://') || r.href.startsWith('https://')); 1074 | const url = isValidUrl ? r.href : '#'; 1075 | let host = 'N/A'; 1076 | if (isValidUrl) { 1077 | try { 1078 | host = new URL(url).hostname; 1079 | } catch (e) { console.error('Failed to parse URL', e); } 1080 | } 1081 | 1082 | return ` 1083 |
1084 |
1085 |

1086 | ${escapeHtml(r.title)} 1087 |

1088 | 1094 |
1095 |

${escapeHtml(r.body)}

1096 | ${host} 1097 |
1098 | `; 1099 | }).join(''); 1100 | 1101 | addBotMessage({ content: `

Web Search Results:

${resultsHtml}
` }); 1102 | 1103 | document.querySelectorAll('.read-url-btn').forEach(button => { 1104 | button.addEventListener('click', (e) => { 1105 | const url = e.target.closest('button').getAttribute('data-url'); 1106 | if (url && url !== '#') { 1107 | handleReadUrlAndChunking(url); 1108 | } 1109 | }); 1110 | }); 1111 | 1112 | } else { 1113 | addBotMessage({ content: "No web search results found." }); 1114 | } 1115 | }) 1116 | .catch((err) => { 1117 | addBotMessage({ content: `Web search error: ${err.message}` }); 1118 | }) 1119 | .finally(() => { 1120 | setThinking(false); 1121 | }); 1122 | }, 1123 | }); 1124 | } 1125 | 1126 | previewRagBtn.addEventListener("click", () => { 1127 | const text = userInput.value.trim(); 1128 | if (!text) { 1129 | alert("Type your query in the box first."); 1130 | return; 1131 | } 1132 | const embeddingModel = embeddingModelSelector.value; 1133 | const numSources = parseInt(numSourcesInput.value) || 3; 1134 | const minScore = parseFloat(minScoreInput.value) || 0; 1135 | fetch("/preview_search", { 1136 | method: "POST", 1137 | headers: { "Content-Type": "application/json" }, 1138 | body: JSON.stringify({ 1139 | query: text, 1140 | session_id: currentSessionId, 1141 | embedding_model: embeddingModel, 1142 | num_sources: numSources, 1143 | }), 1144 | }) 1145 | .then((r) => r.json()) 1146 | .then((data) => { 1147 | if (data.error) { 1148 | alert(`Preview error: ${data.error}`); 1149 | return; 1150 | } 1151 | const filteredData = data.filter(res => res.score >= minScore); 1152 | if (!Array.isArray(filteredData) || filteredData.length === 0) { 1153 | alert("No results found for the given query and minimum score."); 1154 | return; 1155 | } 1156 | let previewContent = filteredData 1157 | .map((res, idx) => { 1158 | return `(${idx + 1}) Score: ${res.score.toFixed(4)} | Source: ${res.source}\n${res.content}\n---\n`; 1159 | }) 1160 | .join(""); 1161 | showModal({ 1162 | title: "RAG Context Preview", 1163 | text: "Retrieved chunks for your current query:", 1164 | contentHTML: `
${escapeHtml(previewContent)}
`, 1165 | }); 1166 | }) 1167 | .catch((err) => { 1168 | alert(`Preview request failed: ${err.message}`); 1169 | }); 1170 | }); 1171 | 1172 | minScoreInput.addEventListener("input", () => { 1173 | minScoreValue.textContent = parseFloat(minScoreInput.value).toFixed(2); 1174 | }); 1175 | maxCharsInput.addEventListener("input", () => { 1176 | maxCharsValue.textContent = parseInt(maxCharsInput.value); 1177 | }); 1178 | 1179 | function pollIngestionTask(taskId) { 1180 | const checkStatus = () => { 1181 | fetch(`/ingest/status/${taskId}`) 1182 | .then(r => r.json()) 1183 | .then(data => { 1184 | if (data.status === 'complete') { 1185 | addSystemMessage(`Ingestion successful! ${data.message}`); 1186 | loadSessionsAndState(); 1187 | } else if (data.status === 'failed') { 1188 | addSystemMessage(`Ingestion failed: ${data.message}`); 1189 | } else { 1190 | setTimeout(checkStatus, 2000); 1191 | } 1192 | }) 1193 | .catch(err => { 1194 | addSystemMessage(`Failed to get ingestion status: ${err.message}`); 1195 | }); 1196 | }; 1197 | addSystemMessage(`Ingestion started with Task ID: ${taskId}. This may take a moment.`); 1198 | setTimeout(checkStatus, 2000); 1199 | } --------------------------------------------------------------------------------