├── images
    ├── rag.png
    ├── learn.png
    ├── IRAG-V.gif
    ├── answer.png
    ├── chunkviz.png
    ├── forget.png
    ├── mod_rag.png
    ├── no_synth.png
    ├── question.png
    ├── RAG-chunks.png
    ├── change_rag.png
    ├── chunkviz-1.png
    ├── embeddings.jpg
    ├── embeddings.png
    ├── llm_agent.png
    ├── mod_rag-2.png
    ├── rag-agent.png
    ├── reset_chat.png
    ├── search-00.png
    ├── search-01.png
    ├── search-02.png
    ├── search-03.png
    ├── with_synth.png
    ├── add_sources.png
    ├── ask_question.png
    ├── list_sources.png
    ├── mdb_diagram.png
    ├── remove_source.png
    ├── scale_tools.png
    ├── search-01-1.png
    ├── search-02-1.png
    ├── answer_refined.png
    ├── remove_sources.png
    ├── actionweaver_mdb.png
    ├── function_calling.jpeg
    └── function_calling.png
├── irag-2025.png
├── irag-chunk-mgmt.png
├── .gitignore
├── score-fusion.md
├── implementation.md
├── rag
    ├── static
    │   ├── styles.css
    │   └── script.js
    ├── templates
    │   └── index.html
    └── app.py
├── README.md
├── LICENSE
├── chunk.md
└── blog.md


/images/rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/rag.png


--------------------------------------------------------------------------------
/irag-2025.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/irag-2025.png


--------------------------------------------------------------------------------
/images/learn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/learn.png


--------------------------------------------------------------------------------
/images/IRAG-V.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/IRAG-V.gif


--------------------------------------------------------------------------------
/images/answer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/answer.png


--------------------------------------------------------------------------------
/images/chunkviz.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/chunkviz.png


--------------------------------------------------------------------------------
/images/forget.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/forget.png


--------------------------------------------------------------------------------
/images/mod_rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/mod_rag.png


--------------------------------------------------------------------------------
/images/no_synth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/no_synth.png


--------------------------------------------------------------------------------
/images/question.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/question.png


--------------------------------------------------------------------------------
/irag-chunk-mgmt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/irag-chunk-mgmt.png


--------------------------------------------------------------------------------
/images/RAG-chunks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/RAG-chunks.png


--------------------------------------------------------------------------------
/images/change_rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/change_rag.png


--------------------------------------------------------------------------------
/images/chunkviz-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/chunkviz-1.png


--------------------------------------------------------------------------------
/images/embeddings.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/embeddings.jpg


--------------------------------------------------------------------------------
/images/embeddings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/embeddings.png


--------------------------------------------------------------------------------
/images/llm_agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/llm_agent.png


--------------------------------------------------------------------------------
/images/mod_rag-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/mod_rag-2.png


--------------------------------------------------------------------------------
/images/rag-agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/rag-agent.png


--------------------------------------------------------------------------------
/images/reset_chat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/reset_chat.png


--------------------------------------------------------------------------------
/images/search-00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-00.png


--------------------------------------------------------------------------------
/images/search-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-01.png


--------------------------------------------------------------------------------
/images/search-02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-02.png


--------------------------------------------------------------------------------
/images/search-03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-03.png


--------------------------------------------------------------------------------
/images/with_synth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/with_synth.png


--------------------------------------------------------------------------------
/images/add_sources.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/add_sources.png


--------------------------------------------------------------------------------
/images/ask_question.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/ask_question.png


--------------------------------------------------------------------------------
/images/list_sources.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/list_sources.png


--------------------------------------------------------------------------------
/images/mdb_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/mdb_diagram.png


--------------------------------------------------------------------------------
/images/remove_source.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/remove_source.png


--------------------------------------------------------------------------------
/images/scale_tools.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/scale_tools.png


--------------------------------------------------------------------------------
/images/search-01-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-01-1.png


--------------------------------------------------------------------------------
/images/search-02-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/search-02-1.png


--------------------------------------------------------------------------------
/images/answer_refined.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/answer_refined.png


--------------------------------------------------------------------------------
/images/remove_sources.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/remove_sources.png


--------------------------------------------------------------------------------
/images/actionweaver_mdb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/actionweaver_mdb.png


--------------------------------------------------------------------------------
/images/function_calling.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/function_calling.jpeg


--------------------------------------------------------------------------------
/images/function_calling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranfysvalle02/Interactive-RAG/HEAD/images/function_calling.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore virtual environments
 2 | .venv
 3 | env/
 4 | venv/
 5 | 
 6 | # Ignore IDE specific settings
 7 | .idea/
 8 | .vscode/
 9 | *.pyc
10 | 
11 | # Ignore build files
12 | build/
13 | dist/
14 | *.egg-info/
15 | 
16 | # Ignore temporary files
17 | *.pyo
18 | *.swp
19 | *~
20 | 
21 | # Ignore test coverage reports
22 | .coverage
23 | 
24 | # Ignore local configurations
25 | .env
26 | 
27 | # Ignore documentation generated files
28 | docs/_build/
29 | 
30 | # Ignore mypy and pytype cache
31 | .mypy_cache/
32 | .dmypy.json
33 | dmypy.json
34 | .pyre/
35 | 
36 | # Ignore Cython debug symbols
37 | cython_debug/
38 | 
39 | # Ignore pytest cache
40 | .pytest_cache/
41 | 
42 | # Ignore pipenv specific files
43 | Pipfile.lock


--------------------------------------------------------------------------------
/score-fusion.md:
--------------------------------------------------------------------------------
  1 | ## Relative Score Fusion for Enhanced Search Results
  2 | 
  3 | This blog post explores a method for combining the power of vector search and full-text search through relative score fusion. This approach utilizes the Sphere dataset, a large corpus for knowledge-intensive NLP tasks.
  4 | 
  5 | **The Scenario:**
  6 | 
  7 | We aim to search for information about companies from the "names" list using both vector search and full-text search. Vector search leverages sentence embeddings for semantic similarity, while full-text search focuses on keyword matching.
  8 | 
  9 | **The Approach:**
 10 | 
 11 | 1. **Data Setup:**
 12 |     - Sentence embeddings are generated for each company name using the Facebook DPR question encoder model.
 13 |     - MongoDB collections are used to store the data and facilitate queries.
 14 | 2. **Pipeline Breakdown:**
 15 |     - **Vector Search:**
 16 |         - The "$vectorSearch" aggregation operator searches for documents with similar vector representations.
 17 |         - The retrieved documents are assigned a "vs_score" based on their search score and scaled using pre-defined parameters.
 18 |     - **Full Text Search:**
 19 |         - The "$search" operator performs full-text search based on the company name.
 20 |         - Matching documents are assigned an "fts_score" and scaled similarly.
 21 |     - **Relative Score Fusion:**
 22 |         - Both sets of results are combined using "$unionWith".
 23 |         - The "$group" operator aggregates the maximum scores for each document across both search methods.
 24 |         - The final score is calculated by adding the scaled "vs_score" and "fts_score" for each document.
 25 |         - The results are then sorted by the final score in descending order, presenting the most relevant documents first.
 26 | 
 27 | **CODE:**
 28 | 
 29 | gist available here: https://gist.github.com/hweller1/d6dbd5036ae4366108b534a0f1662a20
 30 | 
 31 | ```
 32 | vector_agg_with_lookup = [
 33 |         {
 34 |             "$vectorSearch": {
 35 |                 "index": "vector",
 36 |                 "path": "vector",
 37 |                 "queryVector": embedding.tolist(),
 38 |                 "numCandidates": k * overrequest_factor,
 39 |                 "limit": k * 2
 40 |             }
 41 |         },
 42 |         {"$addFields": {"vs_score": {"$meta": "searchScore"}}},
 43 |         {
 44 |             "$project": {
 45 |                 "vs_score": {"$multiply": ["$vs_score", vector_scalar / vector_normalization]},
 46 |                 "_id": 1,
 47 |                 "raw": 1,
 48 |             }
 49 |         },
 50 |         {
 51 |             "$unionWith": {
 52 |                 "coll": "sphere1mm",
 53 |                 "pipeline": [
 54 |                     {
 55 |                         "$search": {
 56 |                             "index": "fts_sphere",
 57 |                             "text": {"query": query, "path": "raw"},
 58 |                         }
 59 |                     },
 60 |                     {"$limit": k * 2},
 61 |                     {"$addFields": {"fts_score": {"$meta": "searchScore"}}},
 62 |                     {
 63 |                         "$project": {
 64 |                             "fts_score": {"$multiply": ["$fts_score", fts_scalar / fts_normalization]},
 65 |                             "_id": 1,
 66 |                             "raw": 1,
 67 |                         }
 68 |                     },
 69 |                 ],
 70 |             }
 71 |         },
 72 |         {
 73 |             "$group": {
 74 |                 "_id": "$raw",
 75 |                 "vs_score": {"$max": "$vs_score"},
 76 |                 "fts_score": {"$max": "$fts_score"},
 77 |             }
 78 |         },
 79 |         {
 80 |             "$project": {
 81 |                 "_id": 1,
 82 |                 "raw": 1,
 83 |                 "vs_score": {"$ifNull": ["$vs_score", 0]},
 84 |                 "fts_score": {"$ifNull": ["$fts_score", 0]},
 85 |             }
 86 |         },
 87 |         {
 88 |             "$project": {
 89 |                 "raw": 1,
 90 |                 "score": {"$add": ["$fts_score", "$vs_score"]},
 91 |                 "_id": 1,
 92 |                 "vs_score": 1,
 93 |                 "fts_score": 1,
 94 |             }
 95 |         },
 96 |         {"$limit": k},
 97 |         {"$sort": {"score": -1}},
 98 |     ]
 99 | ```
100 | 
101 | **Benefits:**
102 | 
103 | This relative score fusion method offers several advantages:
104 | 
105 | - **Improved Search Relevance:** By combining vector search and full-text search, the results capture both semantic similarity and keyword relevance, leading to more accurate and comprehensive answers.
106 | - **Flexibility:** The scaling factors for each score can be adjusted to prioritize either vector search or full-text search based on the specific needs and data characteristics.
107 | - **Scalability:** The aggregation framework allows for efficient execution of the search queries even for large datasets.
108 | 
109 | **Future Directions:**
110 | 
111 | This work opens up exciting possibilities for further exploration:
112 | 
113 | - Investigating different score fusion techniques and weighting schemes.
114 | - Integrating the approach with other search methods, such as entity search.
115 | - Adapting the method to different datasets and NLP applications.
116 | 
117 | By leveraging relative score fusion, we can unlock the potential of hybrid search for enhanced information retrieval and deeper understanding of complex queries.
118 | 


--------------------------------------------------------------------------------
/implementation.md:
--------------------------------------------------------------------------------
  1 | ## What is an Agent anyway?
  2 | 
  3 | An agent is a computer program or system designed to perceive its environment, make decisions, and achieve specific goals.
  4 | 
  5 | Think of an agent as a software entity that displays some degree of autonomy and performs actions in its environment on behalf of its user or owner, but in a relatively independent way. It takes initiatives to perform actions on its own by deliberating its options to achieve its goal(s). The core idea of agents is to use a language model to choose a sequence of actions to take. In contrast to chains, where a sequence of actions is hardcoded in code, agents use a language model as a reasoning engine to determine which actions to take and in which order.
  6 | 
  7 | # Building an Interactive-RAG Agent
  8 | 
  9 | Using [ActionWeaver](https://github.com/TengHu/ActionWeaver/tree/main), a lightweight wrapper for function calling API, we can build a user proxy agent that efficiently retrieves and ingests relevant information using MongoDB Atlas. 
 10 | 
 11 | A proxy agent is a middleman sending client requests to other servers or resources and then bringing responses back. 
 12 | 
 13 | This agent presents the data to the user in an interactive and customizable manner, enhancing the overall user experience.
 14 | 
 15 | The `UserProxyAgent` has several RAG parameters that can be customized, such as `chunk_size`(e.g. 1000), `num_sources`(e.g. 2), `unique`(e.g. True) and `min_rel_score`(e.g. 0.00).
 16 | 
 17 | ```
 18 | class UserProxyAgent:
 19 |     def __init__(self, logger, st):
 20 |         # CHUNK RETRIEVAL STRATEGY
 21 |         self.rag_config = {
 22 |             "num_sources": 2,
 23 |             "source_chunk_size": 1000,
 24 |             "min_rel_score": 0.00,
 25 |             "unique": True,
 26 |         }
 27 | ```
 28 | 
 29 | ```
 30 | class RAGAgent(UserProxyAgent):
 31 |     def __call__(self, text):
 32 |             text = self.preprocess_query(text)
 33 |             # PROMPT ENGINEERING HELPS THE LLM TO SELECT THE BEST ACTION/TOOL
 34 |             agent_rules = f"""
 35 |         We will be playing a special game. Trust me, you do not want to lose.
 36 | 
 37 |         ## RULES
 38 |         - DO NOT ANSWER DIRECTLY
 39 |         - ALWAYS USE ONE OF YOUR AVAILABLE ACTIONS/TOOLS. 
 40 |         - PREVIOUS MESSAGES IN THE CONVERSATION MUST BE CONSIDERED WHEN SELECTING THE BEST ACTION/TOOL
 41 |         - NEVER ASK FOR USER CONSENT TO PERFORM AN ACTION. ALWAYS PERFORM IT THE USERS BEHALF.
 42 |         Given the following user prompt, select the correct action/tool from your available functions/tools/actions.
 43 | 
 44 |         ## USER PROMPT
 45 |         {text}
 46 |         ## END USER PROMPT
 47 |         
 48 |         SELECT THE BEST TOOL FOR THE USER PROMPT! BEGIN!
 49 |     """
 50 |             self.messages += [{"role": "user", "content": agent_rules + "\n\n## IMPORTANT! REMEMBER THE GAME RULES! DO NOT ANSWER DIRECTLY! IF YOU ANSWER DIRECTLY YOU WILL LOSE. BEGIN!"}]
 51 |             if (
 52 |                 len(self.messages) > 2
 53 |             ):  
 54 |                 # if we have more than 2 messages, we may run into: 'code': 'context_length_exceeded'
 55 |                 # we only need the last few messages to know what source to add/remove a source
 56 |                 response = self.llm.create(
 57 |                     messages=self.messages[-2:],
 58 |                     actions=[
 59 |                         self.read_url,
 60 |                         self.answer_question,
 61 |                         self.remove_source,
 62 |                         self.reset_messages,
 63 |                         self.show_messages,
 64 |                         self.iRAG,
 65 |                         self.get_sources_list,
 66 |                         self.search_web
 67 |                     ],
 68 |                     stream=False,
 69 |                 )
 70 |             else:
 71 |                 response = self.llm.create(
 72 |                     messages=self.messages,
 73 |                     actions=[
 74 |                         self.read_url,
 75 |                         self.answer_question,
 76 |                         self.remove_source,
 77 |                         self.reset_messages,
 78 |                         self.show_messages,
 79 |                         self.iRAG,
 80 |                         self.get_sources_list,
 81 |                         self.search_web
 82 |                     ],
 83 |                     stream=False,
 84 |                 )
 85 |             return response
 86 | ```
 87 | 
 88 | ## Why Choose ActionWeaver? 
 89 | Here are some key benefits that influenced our decision to choose ActionWeaver:
 90 | 1. Lightweight and Single-Purposed: ActionWeaver is very lightweight and designed with a singular focus on building LLM applications with function calling. This specialization ensures that it excels in its core function without unnecessary complexity.
 91 | 2. Ease of Use:  ActionWeaver streamlines the process of integrating external tools into agent's toolkit. Using a simple decorator, developers can effortlessly add any Python function, and it also provides the flexibility to include tools from other ecosystems like LangChain or Llama Index.
 92 | 3. Versatility: Despite its simplicity, ActionWeaver offers a wide range of capabilities, including support for forced function execution, parallel function calling and structured data extraction. Such versatility makes it a Swiss Army knife, equipped to handle a variety of AI-related tasks and adapt seamlessly to changing project demands.
 93 | 4. Minimal Dependency: ActionWeaver has minimal dependencies, relying only on the openai and pydantic libraries. This reduces the overhead of managing dependencies.
 94 | 5. Complex Function Orchestration: The framework empowers us to create intricate sequences of function calls, allowing us to build complex hierarchies or chains of functions. This capability enables us to execute sophisticated workflows with ease. 
 95 | 
 96 | ## Key features of OpenAI function calling:
 97 | - Function calling allows you to connect large language models to external tools.
 98 | - The Chat Completions API generates JSON that can be used to call functions in your code.
 99 | - The latest models have been trained to detect when a function should be called and respond with JSON that adheres to the function signature.
100 | - Building user confirmation flows is recommended before taking actions that impact the world on behalf of users.
101 | - Function calling can be used to create assistants that answer questions by calling external APIs, convert natural language into API calls, and extract structured data from text.
102 | - The basic sequence of steps for function calling involves calling the model, parsing the JSON response, calling the function with the provided arguments, and summarizing the results back to the user.
103 | - Function calling is supported by specific model versions, including gpt-4 and gpt-3.5-turbo.
104 | - Parallel function calling allows multiple function calls to be performed together, reducing round-trips with the API.
105 | - Tokens are used to inject functions into the system message and count against the model's context limit and billing.
106 | 
107 | ![](./images/function_calling.png)
108 | 
109 | Read more at: https://thinhdanggroup.github.io/function-calling-openai/
110 | 
111 | ## ActionWeaver Basics: Actions 
112 | 
113 | Actions are functions that an agent can invoke. There are two important design considerations around actions:
114 | 
115 |     Giving the agent access to the right actions
116 |     Describing the actions in a way that is most helpful to the agent
117 | 
118 | ## Crafting Actions for Effective Agents
119 | 
120 | **Actions are the lifeblood of an agent's decision-making.** They define the options available to the agent and shape its interactions with the environment. Consequently, designing effective actions is crucial for building successful agents.
121 | 
122 | **Two key considerations guide this design process:**
123 | 
124 | 1. **Access to relevant actions:** Ensure the agent has access to actions necessary to achieve its objectives. Omitting critical actions limits the agent's capabilities and hinders its performance.
125 | 2. **Action description clarity:** Describe actions in a way that is informative and unambiguous for the agent. Vague or incomplete descriptions can lead to misinterpretations and suboptimal decisions.
126 | 
127 | **By carefully designing actions that are both accessible and well-defined, you equip your agent with the tools and knowledge necessary to navigate its environment and achieve its objectives.**
128 | 
129 | **Further considerations:**
130 | 
131 | * **Granularity of actions:** Should actions be high-level or low-level? High-level actions offer greater flexibility but require more decision-making, while low-level actions offer more control but limit adaptability.
132 | * **Action preconditions and effects:** Clearly define the conditions under which an action can be taken and its potential consequences. This helps the agent understand the implications of its choices.
133 | 
134 | 
135 | If you don't give the agent the right actions and describe them in an effective way, you won’t be able to build a working agent.
136 | 
137 | ![](./images/llm_agent.png)
138 | 
139 | An LLM is then called, resulting in either a response to the user OR action(s) to be taken. If it is determined that a response is required, then that is passed to the user, and that cycle is finished. If it is determined that an action is required, that action is then taken, and an observation (action result) is made. That action & corresponding observation are added back to the prompt (we call this an “agent scratchpad”), and the loop resets, ie. the LLM is called again (with the updated agent scratchpad).
140 | 
141 | ![](./images/scale_tools.png)
142 | 
143 | The ActionWeaver agent framework is an AI application framework that puts function-calling at its core. It is designed to enable seamless merging of traditional computing systems with the powerful reasoning capabilities of Language Model Models. 
144 | ActionWeaver is built around the concept of LLM function calling, while popular frameworks like Langchain and Haystack are built around the concept of pipelines. 
145 | 
146 | ## Key features of ActionWeaver include:
147 | - Ease of Use: ActionWeaver allows developers to add any Python function as a tool with a simple decorator. The decorated method's signature and docstring are used as a description and passed to OpenAI's function API.
148 | - Function Calling as First-Class Citizen: Function-calling is at the core of the framework.
149 | - Extensibility: Integration of any Python code into the agent's toolbox with a single line of code, including tools from other ecosystems like LangChain or Llama Index.
150 | - Function Orchestration: Building complex orchestration of function callings, including intricate hierarchies or chains.
151 | - Debuggability: Structured logging improves the developer experience.


--------------------------------------------------------------------------------
/rag/static/styles.css:
--------------------------------------------------------------------------------
  1 | :root {  
  2 |         --mongodb-green-500: #00ED64;  
  3 |         --mongodb-green-600: #00D159;  
  4 |         --gray-900: #121826;  
  5 |         --gray-800: #1d2333;  
  6 |         --gray-700: #333c51;  
  7 |         --gray-600: #4b5563;  
  8 |         --gray-400: #8b94a9;  
  9 |         --blue-500: #3b82f6;  
 10 |         --blue-600: #2563eb;  
 11 |         --yellow-400: #facc15;  
 12 |         --purple-400: #c084fc;  
 13 |         --purple-500: #8b5cf6;  
 14 |       }  
 15 |   
 16 |       /* Scrollbar styling */  
 17 |       ::-webkit-scrollbar {  
 18 |         width: 8px;  
 19 |       }  
 20 |       ::-webkit-scrollbar-track {  
 21 |         background: var(--gray-900);  
 22 |       }  
 23 |       ::-webkit-scrollbar-thumb {  
 24 |         background: var(--gray-700);  
 25 |         border-radius: 4px;  
 26 |       }  
 27 |       ::-webkit-scrollbar-thumb:hover {  
 28 |         background: var(--gray-400);  
 29 |       }  
 30 |   
 31 |       @keyframes fadeIn {  
 32 |         from { opacity: 0; }  
 33 |         to { opacity: 1; }  
 34 |       }  
 35 |       @keyframes fadeInUp {  
 36 |         from { opacity: 0; transform: translateY(15px); }  
 37 |         to { opacity: 1; transform: translateY(0); }  
 38 |       }  
 39 |       .animate-fade-in-up {  
 40 |         animation: fadeInUp 0.4s ease-out forwards;  
 41 |       }  
 42 |   
 43 |       .typing-dot {  
 44 |         width: 8px;
 45 |         height: 8px;
 46 |         background-color: var(--gray-400);
 47 |         border-radius: 50%;
 48 |         display: inline-block;
 49 |         margin: 0 2px;
 50 |         opacity: 0.6;
 51 |         animation: typing-bubble 1.2s infinite ease-in-out;  
 52 |       }  
 53 |       .typing-dot:nth-child(2) { animation-delay: 0.15s; }  
 54 |       .typing-dot:nth-child(3) { animation-delay: 0.30s; }  
 55 |       @keyframes typing-bubble {  
 56 |         0%, 80%, 100% { transform: scale(0.6); opacity: 0.5; }  
 57 |         40% { transform: scale(1); opacity: 1; }  
 58 |       }  
 59 |   
 60 |       .tooltip-container { position: relative; }  
 61 |       .tooltip-text {  
 62 |         visibility: hidden;  
 63 |         opacity: 0;  
 64 |         width: max-content;  
 65 |         background-color: var(--gray-900);  
 66 |         color: #fff;  
 67 |         text-align: center;  
 68 |         border-radius: 6px;  
 69 |         padding: 5px 10px;  
 70 |         position: absolute;  
 71 |         z-index: 10;  
 72 |         bottom: 125%;  
 73 |         right: 0;  
 74 |         transition: opacity 0.2s, visibility 0.2s;  
 75 |         font-size: 0.8rem;  
 76 |         pointer-events: none;  
 77 |         border: 1px solid var(--gray-600);  
 78 |       }  
 79 |       .tooltip-container:hover .tooltip-text {  
 80 |         visibility: visible;  
 81 |         opacity: 1;  
 82 |       }  
 83 |   
 84 |       @keyframes spin {  
 85 |         to { transform: rotate(360deg); }  
 86 |       }  
 87 |       .spinner-large {  
 88 |         width: 2.5rem;  
 89 |         height: 2.5rem;  
 90 |         border: 4px solid var(--mongodb-green-500);  
 91 |         border-right-color: transparent;  
 92 |         border-radius: 50%;  
 93 |         animation: spin .8s linear infinite;  
 94 |       }  
 95 |   
 96 |       /* Magical Buttons */  
 97 |       .btn {  
 98 |         font-weight: 600;  
 99 |         padding: 0.5rem 1.25rem;  
100 |         border-radius: 8px;  
101 |         border: none;  
102 |         cursor: pointer;  
103 |         transition: transform 0.2s cubic-bezier(0.34, 1.56, 0.64, 1), box-shadow 0.2s ease, background-position 0.3s ease;  
104 |         user-select: none;  
105 |         outline: none;  
106 |         display: inline-flex;  
107 |         align-items: center;  
108 |         justify-content: center;  
109 |       }  
110 |       .btn-primary {  
111 |         background-image: linear-gradient(145deg, var(--mongodb-green-500) 0%, #00c753 100%);  
112 |         background-size: 200% 100%;  
113 |         background-position: right bottom;  
114 |         color: var(--gray-900);  
115 |         box-shadow: 0 2px 4px rgba(0,0,0,0.2), inset 0 1px 1px rgba(255,255,255,0.4), inset 0 -1px 1px rgba(0,0,0,0.2);  
116 |       }  
117 |       .btn-primary:hover:not(:disabled) {  
118 |         background-position: left bottom;  
119 |         transform: translateY(-2px);  
120 |         box-shadow: 0 4px 8px rgba(0,0,0,0.3), inset 0 1px 1px rgba(255,255,255,0.4), inset 0 -1px 1px rgba(0,0,0,0.2);  
121 |       }  
122 |       .btn-primary:active:not(:disabled) {  
123 |         transform: translateY(1px);  
124 |         box-shadow: 0 1px 2px rgba(0,0,0,0.3), inset 0 1px 2px rgba(0,0,0,0.4);  
125 |         transition-duration: 0.1s;  
126 |       }  
127 |       .btn-primary:disabled {  
128 |         opacity: 0.6;  
129 |         cursor: not-allowed;  
130 |       }  
131 |       .btn-secondary {  
132 |         background-color: var(--gray-700);  
133 |         border: 1px solid var(--gray-600);  
134 |         color: white;  
135 |         box-shadow: 0 2px 4px rgba(0,0,0,0.2);  
136 |       }  
137 |       .btn-secondary:hover:not(:disabled) {  
138 |         background-color: var(--gray-600);  
139 |         transform: translateY(-2px);  
140 |         box-shadow: 0 4-8px rgba(0,0,0,0.3);  
141 |       }  
142 |       .btn-secondary:active:not(:disabled) {  
143 |         transform: translateY(1px);  
144 |         box-shadow: 0 1px 2px rgba(0,0,0,0.3);  
145 |         transition-duration: 0.1s;  
146 |       }  
147 |       .btn.needs-update {  
148 |         animation: pulseGreen 1.5s infinite;  
149 |       }  
150 |   
151 |       /* Chat bubble redesign */
152 |       .message.bot-message .bot-avatar {
153 |           align-self: flex-start;
154 |           transform: translateY(4px); /* Minor vertical alignment adjustment */
155 |       }
156 |       .message.bot-message .bot-content {
157 |           border-radius: 0.5rem 1.5rem 1.5rem 1.5rem;
158 |       }
159 |       .message.user-message .user-content {
160 |           background-color: var(--mongodb-green-500);
161 |           color: white;
162 |           border-radius: 1.5rem 0.5rem 1.5rem 1.5rem;
163 |       }
164 | 
165 |       /* Chat bubble spacing */
166 |       .chat-box > *:not(:first-child) {
167 |           margin-top: 1.5rem;
168 |       }
169 |       .chat-box > .user-message + .bot-message,
170 |       .chat-box > .bot-message + .user-message {
171 |           margin-top: 2rem;
172 |       }
173 | 
174 |       /* Source link redesign */
175 |       .source-links a {
176 |           background-color: var(--gray-600);
177 |           color: var(--mongodb-green-500);
178 |           padding: 0.25rem 0.75rem;
179 |           border-radius: 9999px;
180 |           font-weight: 500;
181 |           transition: all 0.2s ease;
182 |           display: inline-flex;
183 |           align-items: center;
184 |           gap: 0.25rem;
185 |       }
186 |       .source-links a:hover {
187 |           background-color: var(--mongodb-green-600);
188 |           color: var(--gray-900);
189 |           transform: translateY(-2px);
190 |       }
191 |       .source-links a:active {
192 |           transform: translateY(0);
193 |       }
194 |   
195 |       /* Chunk Card Styles */  
196 |       .chunk-card {  
197 |         background-color: var(--gray-800);  
198 |         border: 1px solid var(--gray-700);  
199 |         border-radius: 8px;  
200 |         overflow: hidden;  
201 |         transition: all 0.2s ease;  
202 |       }  
203 |       .chunk-card:hover {  
204 |         border-color: var(--mongodb-green-500);  
205 |         box-shadow: 0 0 15px rgba(0, 237, 100, 0.1);  
206 |       }  
207 |       .chunk-header {  
208 |         display: flex;  
209 |         justify-content: space-between;  
210 |         align-items: center;  
211 |         padding: 0.5rem 0.75rem;  
212 |         background-color: var(--gray-700);  
213 |         border-bottom: 1px solid var(--gray-600);  
214 |       }  
215 |       .chunk-title {  
216 |         font-size: 0.8rem;  
217 |         font-weight: bold;  
218 |         color: var(--mongodb-green-500);  
219 |         overflow: hidden;  
220 |         text-overflow: ellipsis;  
221 |         white-space: nowrap;  
222 |       }  
223 |       .chunk-content {  
224 |         padding: 0.75rem;  
225 |         font-size: 0.9rem;  
226 |         color: var(--gray-300);  
227 |         max-height: 150px;  
228 |         overflow-y: auto;  
229 |       }  
230 |       .chunk-list-container {  
231 |         display: flex;  
232 |         flex-direction: column;  
233 |         gap: 0.75rem;  
234 |       }  
235 |   
236 |       /* Floating Logo Animation */  
237 |       @keyframes gentle-float {  
238 |         0% { transform: translateY(0); }  
239 |         50% { transform: translateY(-8px) rotate(2deg); }  
240 |         100% { transform: translateY(0); }  
241 |       }  
242 |       .logo-chunk-group {  
243 |         animation: gentle-float 10s ease-in-out infinite;  
244 |       }  
245 |   
246 |       /* Control Panel Styles */  
247 |       .control-panel-section {  
248 |         background-color: var(--gray-900);  
249 |         border: 1px solid var(--gray-700);  
250 |         border-radius: 12px;  
251 |         padding: 1rem;  
252 |       }  
253 |       .control-panel-title {  
254 |         font-size: 0.9rem;  
255 |         font-weight: 700;  
256 |         color: var(--mongodb-green-500);  
257 |         margin-bottom: 0.75rem;  
258 |         padding-bottom: 0.5rem;  
259 |         border-bottom: 1px solid var(--gray-700);  
260 |       }
261 | 
262 |       /* Source Browser Styles */
263 |       .sb-column-header {
264 |         display: flex;
265 |         justify-content: space-between;
266 |         align-items: center;
267 |         padding: 0 0.25rem 0.75rem 0.25rem;
268 |         margin-bottom: 0.5rem;
269 |         border-bottom: 1px solid var(--gray-700);
270 |         font-size: 0.9rem;
271 |         font-weight: 600;
272 |         color: var(--gray-300);
273 |         flex-shrink: 0;
274 |       }
275 | 
276 |       .sb-count-badge {
277 |         font-size: 0.75rem;
278 |         font-weight: 500;
279 |         background-color: var(--gray-700);
280 |         color: var(--gray-300);
281 |         padding: 2px 8px;
282 |         border-radius: 9999px;
283 |         flex-shrink: 0;
284 |         margin-left: 0.5rem;
285 |       }
286 | 
287 |       .source-item {
288 |         display: flex;
289 |         justify-content: space-between;
290 |         align-items: center;
291 |         width: 100%;
292 |         padding: 0.5rem 0.75rem;
293 |         border-radius: 6px;
294 |         cursor: pointer;
295 |         transition: background-color 0.2s, color 0.2s;
296 |         text-align: left;
297 |         border: none;
298 |         background-color: transparent;
299 |         color: var(--gray-300);
300 |         font-size: 0.9rem;
301 |       }
302 |       .source-item:hover {
303 |         background-color: var(--gray-700);
304 |       }
305 |       .source-item.active {
306 |         background-color: var(--mongodb-green-500);
307 |         color: var(--gray-900);
308 |         font-weight: bold;
309 |       }
310 |       .source-item.active .sb-count-badge {
311 |         background-color: var(--gray-800);
312 |         color: var(--mongodb-green-500);
313 |       }
314 | 
315 | 
316 |       /* Style for the file drop zone when dragging a file over it */
317 |       .drop-zone-dragover {
318 |         border-color: var(--mongodb-green-500) !important;
319 |         background-color: rgba(0, 237, 100, 0.1);
320 |       }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Interactive RAG Agent
  2 | 
  3 | ![](irag-2025.png)
  4 | 
  5 | 
  6 | ## The RAG Revolution: From Fragmented Mess to Unified Intelligence
  7 | 
  8 | Large Language Models (LLMs) are transforming our world, but they have a fundamental limitation: they only know what they were trained on. To make them truly useful for specific, real-world tasks, we need to ground them in our own data. This is the promise of **Retrieval-Augmented Generation (RAG)**, a technique that gives an LLM access to a relevant knowledge base.
  9 | 
 10 | However, many RAG systems are built on a shaky foundation. They're a fragmented mess of different databases and systems cobbled together, making them brittle, inefficient, and difficult to manage.
 11 | 
 12 | But there's a better way. By combining the art of intelligent document **chunking** with a **unified data architecture**, we can build RAG agents that are not just powerful, but also flexible, manageable, and truly intelligent. This guide will show you how.
 13 | 
 14 | -----
 15 | 
 16 | ## The Problem: The Frankenstein's Monster of RAG Architectures
 17 | 
 18 | Let's be honest: a typical RAG setup often looks like a digital Frankenstein. Your raw documents live in one place, their vector embeddings are stored in a separate vector database, and the metadata that gives them context is tucked away somewhere else entirely. 🧟
 19 | 
 20 | This siloed approach creates a nightmare for anyone trying to build, maintain, or improve the system:
 21 | 
 22 |   * **Painful Updates:** How do you update a single piece of information and ensure its vector and metadata are changed everywhere, atomically?
 23 |   * **Stifled Experimentation:** Want to test a new, better embedding model? Get ready to build an entirely new, parallel system and migrate all your data.
 24 |   * **Slow, Complex Queries:** Every question requires complex joins across different databases, adding latency and making the system a headache to scale.
 25 | 
 26 | This fragmented architecture simply can’t keep up with the pace of modern AI.
 27 | 
 28 | -----
 29 | 
 30 | ## The Unified Solution: A Single Source of Truth
 31 | 
 32 | The key to building a smarter RAG system is to create a single source of truth using a flexible document model. Instead of scattering your data across multiple systems, every chunk of your knowledge is stored as a single, self-contained JSON document in a database like MongoDB.
 33 | 
 34 | ```json
 35 | {
 36 |   "_id": ObjectId("..."),
 37 |   "text": "MongoDB's document model stores data as BSON documents...",
 38 |   "metadata": {
 39 |     "source": "https://www.mongodb.com/docs/",
 40 |     "source_type": "url",
 41 |     "session_id": "product_faq"
 42 |   },
 43 |   "embedding_openai": [0.123, 0.456, ...],
 44 |   "embedding_voyageai": [0.789, 0.101, ...]
 45 | }
 46 | ```
 47 | 
 48 | This elegant structure immediately solves our biggest problems and unlocks new capabilities:
 49 | 
 50 |   * **🧪 Experiment in Minutes, Not Months:** The schema-agnostic model lets you store vectors from multiple embedding models in the *same document*. You can easily A/B test a new model by simply adding a new field—no complex data migration required.
 51 |   * **🎯 Achieve Pinpoint Accuracy:** You can perform a vector search while simultaneously filtering on any metadata field. This lets you instantly scope a search to a specific user session or document type, dramatically increasing the relevance of your results.
 52 | 
 53 | This unified approach streamlines the entire RAG pipeline, bringing your data, its context, and its vector representations together into one cohesive whole.
 54 | 
 55 | -----
 56 | 
 57 | ## The Foundation: The Art of Intelligent Chunking
 58 | 
 59 | ![](irag-chunk-mgmt.png)
 60 | 
 61 | With our data model in place, we need to prepare the content. The performance of any RAG system hinges on a well-chunked knowledge base. Breaking a document into pieces sounds simple, but doing it *intelligently* is crucial.
 62 | 
 63 | Using a tool like **LangChain's `RecursiveCharacterTextSplitter`** is a great start. It intelligently breaks down documents by trying to keep paragraphs and sentences whole, which is vital for preserving the semantic meaning of the text.
 64 | 
 65 | You can control this process with two key "tuning knobs":
 66 | 
 67 |   * **Chunk Size:** A starting point of **1,000 characters** is a good balance. It's small enough for precise retrieval but large enough to contain meaningful context.
 68 |   * **Chunk Overlap:** An overlap of **150 characters** creates a contextual bridge between adjacent chunks. This ensures that an important idea isn't awkwardly split in two right at a boundary.
 69 | 
 70 | -----
 71 | 
 72 | ## The Conversation: Tuning for Precision
 73 | 
 74 | Once your knowledge is ingested, getting the best answers requires fine-grained control over the retrieval process. Think of it as a conversation with your data, and you have the dials to control the clarity.
 75 | 
 76 | ### The Quality Bouncer: `min_rel_score`
 77 | 
 78 | The **minimum relevance score** acts as a critical quality filter—like a bouncer at a club, it only lets in high-quality information. Vector search ranks results by similarity, assigning a score from 0 to 1. By setting a threshold (e.g., 0.80), you tell the agent to ignore any chunks that aren't a strong match for the query.
 79 | 
 80 | This empowers your agent to confidently say, "I don't know," rather than trying to invent an answer from low-quality context. This is a hallmark of an intelligent system, preventing "garbage-in, garbage-out" scenarios.
 81 | 
 82 | ### The Context Dial: `num_sources` (k)
 83 | 
 84 | The **`num_sources`** parameter (often called 'k') is your context dial. It determines how many of the top-ranking chunks the agent retrieves to answer a question.
 85 | 
 86 |   * **For specific, factual questions,** you want a focused beam of light. A small `k` (e.g., 3) is ideal.
 87 |   * **For open-ended, brainstorming queries,** you need a floodlight. A larger `k` (e.g., 10) provides the broader context necessary for a comprehensive response.
 88 | 
 89 | This simple dial allows you to perfectly balance the need for concise answers with comprehensive ones.
 90 | 
 91 | -----
 92 | 
 93 | ## The Agent's Edge: A Living, Organized Knowledge Base
 94 | 
 95 | A truly intelligent RAG agent doesn't just *read* its knowledge base—it helps *manage* it. Because each chunk is a unique document with its own `_id`, the agent can perform standard database operations.
 96 | 
 97 | Imagine a user points out that a company policy has changed. The agent can use a tool to execute a command like this:
 98 | 
 99 | `update_chunk(chunk_id='...', new_content='The new policy takes effect on Jan 1, 2026.')`
100 | 
101 | This transforms the RAG system from a static library into a **living knowledge base** that can be corrected and updated in real time. 🧠 This crucial capability is often impossible in fragmented RAG applications.
102 | 
103 | To manage this evolving knowledge, the agent uses **sessions**—distinct workspaces with their own isolated knowledge and chat history. This ensures that when you're working on "Project Alpha," you're only getting answers from the "Project Alpha" knowledge base, keeping your conversations clean and contextually relevant.
104 | 
105 | -----
106 | 
107 | ## Conclusion
108 | 
109 | By moving away from fragmented architectures and embracing a unified approach, you can build AI agents that are not only more powerful but also infinitely more manageable. MongoDB’s document model simplifies data management, intelligent chunking enhances retrieval quality, and tunable parameters give you the control to refine results.
110 | 
111 | Most importantly, by treating each chunk as a self-contained, editable entity, your knowledge base can grow and evolve. This is the foundation for a truly dynamic and intelligent AI system, ready for the future.
112 | 
113 | -----
114 | 
115 | -----
116 | 
117 | ## Appendix: Under the Hood of a Unified RAG System
118 | 
119 | ### The Strategic Value of a Single Document
120 | 
121 | So, why is keeping everything in one document so revolutionary for RAG? Let's revisit the hard questions posed by fragmented systems:
122 | 
123 |   * **How do you A/B test a new embedding model** without building an entirely new system and migrating all your data?
124 |   * **How do you perform a similarity search that's also filtered by user metadata** (like `session_id`) without slow, expensive joins between databases?
125 |   * **How do you update or delete a single chunk** and guarantee its vector and metadata are handled atomically?
126 | 
127 | The unified document model solves these problems with elegance. A single ingestion process can generate embeddings from multiple models and store them right next to the text and its metadata.
128 | 
129 | ```json
130 | {
131 |   "text": "The quick brown fox...",
132 |   "metadata": {
133 |     "source": "example.txt",
134 |     "session_id": "project_alpha"
135 |   },
136 |   "embedding_openai": [0.01, 0.02, ...],
137 |   "embedding_voyageai": [0.98, 0.97, ...]
138 | }
139 | ```
140 | 
141 | This structure provides immense flexibility and future-proofs your architecture. As better models emerge, you can adopt them without disruption.
142 | 
143 | ### The Agent's Toolkit: Tools and Pipelines
144 | 
145 | The agent interacts with this unified database using **tools**. In a framework like LangChain, a tool is a function the LLM can decide to call based on the user's query.
146 | 
147 | Our `search_knowledge_base` tool is powered by a **MongoDB Aggregation Pipeline**, which is where the magic happens.
148 | 
149 | ```python
150 | @tool
151 | def search_knowledge_base(query: str, embedding_model: str, num_sources: int = 3) -> str:
152 |     """Query the knowledge base to find relevant chunks for `query`."""
153 |     
154 |     # Select the correct vector field based on the user's choice
155 |     model_config = EMBEDDING_CONFIG[embedding_model]
156 |     query_vector = config.embedding_clients[embedding_model].embed_query(query)
157 | 
158 |     pipeline = [
159 |         # Stage 1: Perform the vector search and metadata filtering in one step
160 |         {
161 |             "$vectorSearch": {
162 |                 "index": model_config['index_name'],
163 |                 "path": model_config['vector_field'], # Dynamically use the right embedding
164 |                 "queryVector": query_vector,
165 |                 "limit": num_sources,
166 |                 "numCandidates": num_sources * 10,
167 |                 "filter": {
168 |                     # Only search within the current user's session
169 |                     "metadata.session_id": {"$eq": config.current_session}
170 |                 }
171 |             }
172 |         },
173 |         # Stage 2: Reshape the output for the LLM
174 |         {
175 |             "$project": {
176 |                 "score": {"$meta": "vectorSearchScore"},
177 |                 "content": "$text",
178 |                 "source": "$metadata.source"
179 |             }
180 |         }
181 |     ]
182 |     results = list(config.collection.aggregate(pipeline))
183 |     return format_results(results)
184 | ```
185 | 
186 | This pipeline is where the power of the unified model becomes clear. The `$vectorSearch` stage efficiently finds the most semantically similar chunks while *simultaneously* applying a `filter` on the metadata. This is a highly optimized, single-database operation that is far more efficient than coordinating searches across separate systems. This architecture paves the way for even more advanced strategies, all within the same powerful pipeline.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/chunk.md:
--------------------------------------------------------------------------------
  1 | ## Chunking: A Hidden Hero in the Rise of GenAI
  2 | 
  3 | ## ![Alt text](https://cdn.stackoverflow.co/images/jo7n4k8s/production/ef172115fca9aa6b3b99eeb1c749acf9f8c183a0-6000x3150.png?w=1200&h=630&auto=format&dpr=2)
  4 | 
  5 | The recent boom in Large Language Models (LLMs) has opened up a new world of possibilities for understanding and interacting with language. One of the most exciting applications is their ability to automatically summarize long documents, saving us valuable time and effort. However, effectively summarizing longer documents with LLMs still presents some challenges. This blog post dives into the often-overlooked but crucial role of "chunking" and its potential to unlock the full power of LLMs in document summarization, particularly within the context of the Retrieval-Augmented Generation (RAG) model, **powered by the innovative capabilities of MongoDB Atlas Vector Search.**
  6 | 
  7 | **RAG and the Chunking Puzzle:**
  8 | 
  9 | RAG takes a two-pronged approach to summarization, combining the strengths of both information retrieval and text generation. It first identifies relevant passages within the document based on a query and then uses an LLM to craft a concise and informative summary of those passages. However, the effectiveness of this process hinges heavily on how the document is divided into smaller units, known as "chunks." Chunks that are too large can overwhelm the LLM, leading to inaccurate or incomplete summaries. Conversely, chunks that are too small may not provide enough context for the LLM to understand the overall message of the document.
 10 | 
 11 | **The Quest for the Optimal Chunk:**
 12 | 
 13 | Researchers have been actively exploring various chunking strategies to optimize the performance of RAG. Here are some:
 14 | 
 15 | * **Fixed-size chunks with overlap:** This method involves dividing the document into chunks of a predetermined size, ensuring sufficient context while minimizing information loss at chunk boundaries. By leveraging the $vectorSearch operator, we can now perform efficient Approximate Nearest Neighbor (ANN) searches within each chunk, ensuring we retrieve the most relevant passages for summarization.
 16 | * **Recursive chunking:** This strategy takes an iterative approach, starting with the entire document and then splitting each chunk into smaller and smaller pieces. This allows for fine-grained control over the level of detail and context presented to the LLM. MongoDB Atlas Vector Search's vector representation of document content empowers us to perform hierarchical chunking, efficiently identifying the most relevant sub-topics within each segment.
 17 | * **Paragraph-based chunking:** This method utilizes natural paragraph breaks to define chunk boundaries, making it suitable for documents with well-defined paragraphs. However, it may not be ideal for texts with more unstructured content. Here, the $filter capabilities of MongoDB Atlas Vector Search come in handy, allowing us to filter chunks based on specific keywords or semantic similarity to ensure we focus on the most relevant parts of the document.
 18 | * **Single-page chunks:** This simple approach uses entire pages as individual chunks. While efficient, it may not capture crucial details or effectively address the limitations of LLM processing capabilities. By leveraging the hybrid search capabilities of MongoDB Atlas Vector Search, we can combine traditional keyword search with vector similarity to achieve optimal chunk retrieval, even for single-page documents.
 19 | 
 20 | **Other Strategies**
 21 | 
 22 | **Parent Document Retrieval Strategies for RAG:**
 23 | 
 24 | The effectiveness of RAG relies heavily on the initial retrieval of relevant passages from the "parent document." Here are some key strategies:
 25 | 
 26 | * **Keyword matching:** This traditional approach involves matching keywords from the query to keywords within the document. While simple and efficient, it may not capture the full semantic meaning of the query or the document.
 27 | * **Passage embedding and retrieval:** This strategy uses vector representations of both the query and the document passages. This allows for more precise retrieval based on semantic similarity, even if the exact keywords don't match. MongoDB Atlas Vector Search excels at this, enabling efficient and accurate retrieval of relevant passages using the $vectorSearch operator.
 28 | * **Hybrid search:** This approach combines keyword matching with passage embedding and retrieval. This leverages the strengths of both methods, ensuring both high recall (finding all relevant passages) and high precision (finding only relevant passages).
 29 | 
 30 | ```
 31 | agg_pipeline = [{
 32 | 	        "$vectorSearch": {
 33 | 	            "index":'nested_search_index',
 34 | 	            "path": "text_embedding",
 35 | 	            "queryVector": query_vector,
 36 |                 "limit": k,
 37 | 	            "numCandidates": k * multiplier,
 38 | 	            },
 39 | 	        },
 40 | 	        },
 41 |             {
 42 |             "$match": {"sample_question": {"$exists": False}}
 43 |             },
 44 | 	        {
 45 |     		"$project": {"text_embedding": 0} 
 46 | 	        },
 47 | 	    {
 48 |         '$lookup' : {"from": "hnsw_parent_retrieval_example",
 49 |                       "localField": "parent_id",
 50 |                       "foreignField": "_id",
 51 |                       "as": 'parent_documents'
 52 |                        }},
 53 |         {'$unwind': {"path": "$parent_documents"}},
 54 |         {"$limit": k}
 55 | ]
 56 | ```
 57 | 
 58 | This aggregation pipeline in MongoDB Atlas vector search retrieves relevant documents based on a query vector and performs further filtering and processing. Here's a breakdown of each stage:
 59 | 
 60 | **Stage 1: $vectorSearch:**
 61 | 
 62 | - **index:** Specifies the name of the vector search index used for retrieval.
 63 | - **path:** Defines the path within each document where the text embedding vector is stored (assumed to be "text_embedding").
 64 | - **queryVector:** The vector representation of the query used for semantic search.
 65 | - **limit:** Maximum number of documents to retrieve (k).
 66 | - **numCandidates:** Number of candidate documents to consider before filtering (k * multiplier). This helps ensure enough relevant documents are retrieved even after filtering.
 67 | 
 68 | **Stage 2: $match:**
 69 | 
 70 | - **"sample_question": {"$exists": False}}:** This filters out documents having a field named "sample_question", ensuring we only deal with documents relevant to the current task.
 71 | 
 72 | **Stage 3: $project:**
 73 | 
 74 | - **"text_embedding": 0:** Excludes the "text_embedding" field from the output documents, potentially reducing document size and improving efficiency.
 75 | 
 76 | **Stage 4: $lookup:**
 77 | 
 78 | - **"from": "hnsw_parent_retrieval_example":** Specifies the name of the collection containing parent documents.
 79 | - **"localField": "parent_id"**: Identifies the field in the current document that stores the parent document ID.
 80 | - **"foreignField": "_id"**: Identifies the field in the parent document collection that stores the document ID.
 81 | - **"as": 'parent_documents'**: Defines the alias for the retrieved parent documents in the output.
 82 | 
 83 | **Stage 5: $unwind:**
 84 | 
 85 | - **{"path": "$parent_documents"}**: "Unwinds" the "parent_documents" array, creating separate documents for each parent document associated with the current document.
 86 | 
 87 | **Stage 6: $limit:**
 88 | 
 89 | - **"limit": k**: Limits the final output to only the k most relevant documents, potentially based on a combination of vector search relevance and information from the parent documents.
 90 | 
 91 | Overall, this pipeline uses vector search to retrieve relevant documents based on a query vector, performs further filtering and exclusion, associates each document with its corresponding parent document, and finally returns the k most relevant documents.
 92 | 
 93 | ## Beyond Retrieval: Unlocking Deeper Insights with Question-Driven Chunking and LLM Processing
 94 | 
 95 | Imagine summarizing a news article about a groundbreaking scientific discovery. You've retrieved a relevant chunk, brimming with technical jargon and intricate concepts. To truly grasp the essence of this discovery and prepare the information for LLM-based summarization, a more proactive approach is needed. Here's where **question-driven chunking**, powered by MongoDB Atlas Vector Search, comes into play.
 96 | 
 97 | Instead of passively processing the entire chunk, we can ask a targeted question like: "What are the key implications of this new discovery for the field of medicine?" This simple act transforms the process from passive consumption to active exploration, focusing the LLM's attention on the most relevant information.
 98 | 
 99 | **Leveraging the Power of Vector Search:**
100 | 
101 | Through the magic of MongoDB Atlas Vector Search, both the question and the chunk are embedded into a "semantic landscape." This allows us to search for the sentence within the chunk that best aligns with the question's meaning, regardless of exact word matches. This targeted approach unlocks several key benefits:
102 | 
103 | * **Enhanced Understanding:** By focusing solely on the relevant answer sentence, the LLM receives the most crucial information, leading to a more accurate and insightful summary.
104 | * **Reduced Workload:** The LLM doesn't have to sift through the entire chunk, minimizing processing time and computational resources.
105 | * **Unveiling Deeper Connections:** Asking questions allows us to uncover hidden insights within the information, generating summaries that go beyond just factual details.
106 | 
107 | **The Power of LLM Processing:**
108 | 
109 | Once the answer sentence is extracted through MongoDB Atlas Vector Search, the LLM can be used to further refine and summarize the extracted information. This process involves:
110 | 
111 | * **Contextualization:** Providing the LLM with additional context, such as the original question, relevant sentences from the chunk, and the desired length and key points for the summary.
112 | * **LLM Processing:** The LLM then leverages its capabilities to extract key information, rephrase the answer sentence for clarity and conciseness, and ultimately generate a concise and informative summary.
113 | * **Integration:** This LLM-generated summary can be integrated into a larger summarization system that combines summaries from multiple chunks, performs fact-checking, and offers different summarization styles for diverse audiences and purposes.
114 | 
115 | **A New Frontier for Text Analysis:**
116 | 
117 | By combining the power of question-driven chunking with LLM processing, we unlock a new level of sophistication in text analysis and summarization. This approach allows us to:
118 | 
119 | * Extract the most relevant and insightful information from complex documents.
120 | * Generate summaries that are not only factually accurate but also tailored to specific needs and goals.
121 | * Open up exciting possibilities for utilizing LLM technology for a wide range of applications.
122 | 
123 | This is not just about summarizing text; it's about unlocking deeper understanding and transforming information into meaningful insights. By embracing a question-driven approach and leveraging the power of LLM processing, we open a new chapter in the field of text analysis and summarization, paving the way for a more insightful and impactful future.
124 | 
125 | **Beyond Chunking: LLM-powered Enhancements:**
126 | 
127 | Several innovative approaches leverage LLMs to further improve chunking effectiveness, all powered by MongoDB Atlas Vector Search:
128 | 
129 | * **LLM pre-summarization:** This strategy involves using an LLM to pre-summarize the content of each chunk before feeding it to the main RAG model. This significantly reduces the workload for the LLM and can lead to more accurate summarization. By storing pre-summarized content as vectors within MongoDB Atlas Vector Search, we can further enhance query efficiency and enable efficient retrieval of relevant chunks.
130 | * **Static text generation from structured data:** This technique leverages LLMs to generate a static textual representation of the information within each chunk. This can be particularly useful for summarizing documents containing complex data structures, such as tables or figures. MongoDB Atlas Vector Search allows us to store and search these generated texts alongside the original data, enabling a more comprehensive understanding of the document's content.
131 | * **Exchange boundary chunking:** This method is specifically designed for dialogue transcripts and involves splitting the transcript based on speaker changes. This allows the LLM to capture the flow of conversation and generate more accurate summaries. In conjunction with MongoDB Atlas Vector Search, we can perform speaker identification and topic segmentation, further optimizing chunk retrieval for dialogue-based content.
132 | 
133 | ## Examples
134 | 
135 | **Example 1: Fixed-size chunks with overlap and the $vectorSearch operator:**
136 | 
137 | Imagine you're summarizing a 10-page research paper using the RAG model. Utilizing MongoDB Atlas Vector Search, you can:
138 | 
139 | 1. **Divide the document into fixed-size chunks**, say 1000 words each, with a 500-word overlap. This ensures sufficient context while minimizing information loss at chunk boundaries.
140 | 2. **Within each chunk, leverage the $vectorSearch operator to perform efficient ANN searches**. This allows you to identify the most relevant sentences within each chunk, based on your query or specific keywords.
141 | 3. **Feed these retrieved sentences to the LLM for summarization**, ensuring that the final summary focuses on the most crucial aspects of the document.
142 | 
143 | **Example 2: Recursive chunking and vector representation:**
144 | 
145 | Consider summarizing a legal document with complex nested structures. Using MongoDB Atlas Vector Search, you can:
146 | 
147 | 1. **Start by dividing the document into its main sections**.
148 | 2. **For each section, utilize the vector representation of its content to identify sub-topics**.
149 | 3. **Recursively apply this process**, further dividing each sub-topic into smaller and more focused segments.
150 | 4. **This hierarchical chunking approach, powered by vector similarity, ensures that the LLM receives relevant and contextually rich information for summarization.**
151 | 
152 | **Example 3: Paragraph-based chunking with $filter:**
153 | 
154 | You want to summarize an online news article. Utilizing MongoDB Atlas Vector Search, you can:
155 | 
156 | 1. **Divide the document into natural paragraph breaks**.
157 | 2. **Apply the $filter operator to filter chunks based on specific keywords** related to your query or area of interest.
158 | 3. **This ensures that the LLM focuses solely on the most relevant sections of the article**, generating a concise and informative summary.
159 | 
160 | **Example 4: Single-page chunks and hybrid search:**
161 | 
162 | You need to summarize a product manual with minimal text but lots of diagrams and figures. Using MongoDB Atlas Vector Search, you can:
163 | 
164 | 1. **Treat each page as a single chunk**.
165 | 2. **Employ hybrid search capabilities**, combining traditional keyword search with vector similarity.
166 | 3. **This allows you to retrieve relevant chunks based on both textual content and visual information embedded within the diagrams and figures**.
167 | 4. **The LLM can then process these retrieved chunks to generate a comprehensive and accurate summary of the entire product manual.**
168 | 
169 | **Example 5: LLM pre-summarization and vector storage:**
170 | 
171 | Imagine you have a large corpus of scientific articles that you need to summarize regularly. With MongoDB Atlas Vector Search, you can:
172 | 
173 | 1. **Pre-summarize each article using an LLM**.
174 | 2. **Store these pre-summarized texts as vectors within MongoDB Atlas Vector Search**.
175 | 3. **This allows for efficient query processing and retrieval of relevant summaries**, significantly reducing the workload on the main LLM.
176 | 4. **When a new query arrives, you can first search for pre-existing summaries based on vector similarity**. This can potentially provide instant results for common queries, saving valuable time and computational resources.
177 | 
178 | These are just a few examples of how MongoDB Atlas Vector Search can be used to enhance the effectiveness of chunking in LLM-based summarization tasks. By leveraging its powerful search and storage capabilities, researchers and developers can unlock the full potential of LLMs and achieve even better performance in document understanding and summarization.
179 | 
180 | 
181 | **The Future of Chunking: Unlocking the Full Potential of LLMs:**
182 | 
183 | Chunking experimentation, empowered by MongoDB Atlas Vector Search, is an exciting field with the potential to revolutionize the way LLMs approach document summarization. By exploring and optimizing different chunking strategies, researchers are paving the way for LLMs to generate informative, accurate, and concise summaries of even the most complex documents.
184 | 


--------------------------------------------------------------------------------
/rag/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en" class="dark">
  3 |  <head>
  4 |   <meta charset="UTF-8" />
  5 |   <title>Interactive RAG Agent // MongoDB Edition</title>
  6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  7 |   <script src="https://cdn.tailwindcss.com?plugins=typography"></script>
  8 |   <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
  9 |   <link rel="stylesheet" href="/static/styles.css" />
 10 |  </head>
 11 | 
 12 |  <body class="bg-gray-900 flex items-center justify-center h-screen font-sans text-gray-100 p-4">
 13 |   <div
 14 |    id="app-container"
 15 |    class="w-full max-w-7xl h-full bg-gray-800 rounded-xl shadow-2xl flex gap-4 overflow-hidden border border-gray-700 p-4"
 16 |   >
 17 |    <div class="flex flex-col flex-grow h-full">
 18 |     <div
 19 |      id="chat-header"
 20 |      class="p-4 border-b border-gray-700 flex justify-between items-center flex-shrink-0"
 21 |     >
 22 |      <div class="flex items-center gap-4">
 23 |       <svg
 24 |        class="h-12 w-12"
 25 |        viewBox="0 0 200 200"
 26 |        xmlns="http://www.w3.org/2000/svg"
 27 |        aria-labelledby="svgTitle"
 28 |        role="img"
 29 |       >
 30 |        <title id="svgTitle">MongoDB Themed Data Chunks</title>
 31 |        <g class="logo-chunk-group">
 32 |         <g class="chunk chunk-1" transform="translate(40, 40)">
 33 |          <path d="M 0 15 L 30 0 L 60 15 L 30 30 Z" fill="#00ED64" />
 34 |          <path d="M 0 15 L 0 45 L 30 60 L 30 30 Z" fill="#47A43D" />
 35 |          <path d="M 60 15 L 60 45 L 30 60 L 30 30 Z" fill="#014732" />
 36 |         </g>
 37 |         <g class="chunk chunk-2" transform="translate(70, 80)">
 38 |          <path d="M 0 20 L 40 0 L 80 20 L 40 40 Z" fill="#00ED64" />
 39 |          <path d="M 0 20 L 0 50 L 40 70 L 40 40 Z" fill="#47A43D" />
 40 |          <path d="M 80 20 L 80 50 L 40 70 L 40 40 Z" fill="#014732" />
 41 |         </g>
 42 |        </g>
 43 |       </svg>
 44 |       <h1 id="chat-title" class="text-2xl font-bold text-white">
 45 |        Interactive RAG Agent
 46 |       </h1>
 47 |      </div>
 48 |     </div>
 49 |    
 50 |     <div id="chat-box" class="flex-grow p-4 md:p-6 overflow-y-auto space-y-6">
 51 |      <div
 52 |       class="message system-message animate-fade-in-up bg-yellow-900/50 text-yellow-300 border-l-4 border-yellow-500 p-4 rounded-r-lg"
 53 |      >
 54 |       <b>Welcome!</b> Use the Control Panel on the right to manage
 55 |       sessions, add data, and fine-tune retrieval settings.
 56 |      </div>
 57 |     </div>
 58 |    
 59 |     <div
 60 |      id="thinking-indicator"
 61 |      class="p-4 flex items-center gap-2 text-gray-400 invisible opacity-0 transition-opacity duration-300"
 62 |     >
 63 |      <div class="typing-dot"></div>
 64 |      <div class="typing-dot"></div>
 65 |      <div class="typing-dot"></div>
 66 |      <span>Agent is thinking...</span>
 67 |     </div>
 68 |    
 69 |     <div id="input-area" class="p-4 border-t border-gray-700 flex-shrink-0">
 70 |      <div class="flex justify-end items-center mb-2">
 71 |       <div class="tooltip-container">
 72 |        <button
 73 |         type="button"
 74 |         id="preview-rag-btn"
 75 |         class="flex items-center gap-2 text-sm text-gray-400 hover:text-white bg-gray-700 hover:bg-gray-600 px-3 py-1 rounded-md transition-colors"
 76 |        >
 77 |         <svg
 78 |          xmlns="http://www.w3.org/2000/svg"
 79 |          fill="none"
 80 |          viewBox="0 0 24 24"
 81 |          stroke-width="1.5"
 82 |          stroke="currentColor"
 83 |          class="w-4 h-4"
 84 |         >
 85 |          <path
 86 |           stroke-linecap="round"
 87 |           stroke-linejoin="round"
 88 |           d="M2.036 12.322a1.012 1.012 0 010-.639l4.43-7.29a1.125 1.125 0 011.906 0l4.43 7.29c.356.586.356 1.35 0 1.936l-4.43 7.29a4.5 4.5 0 01-6.364-6.364l1.757-1.757m13.35-.622l1.757-1.757a4.5 4.5 0 00-6.364-6.364l-4.5 4.5a4.5 4.5 0 001.242 7.244"
 89 |          ></path>
 90 |          <path
 91 |           stroke-linecap="round"
 92 |           stroke-linejoin="round"
 93 |           d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"
 94 |          ></path>
 95 |         </svg>
 96 |         <span>Preview Context</span>
 97 |        </button>
 98 |        <div class="tooltip-text">
 99 |         Preview the RAG context for your query before sending
100 |        </div>
101 |       </div>
102 |      </div>
103 |     
104 |      <form id="chat-form" class="relative flex items-center">
105 |       <textarea
106 |        id="user-input"
107 |        placeholder="Ask a question or give a command..."
108 |        class="w-full bg-gray-700 rounded-lg pr-12 py-3 px-4 text-base focus:outline-none focus:ring-2 focus:ring-mongodb-green-500 resize-none"
109 |        rows="1"
110 |       ></textarea>
111 |       <button
112 |        type="submit"
113 |        class="absolute right-3 top-1/2 -translate-y-1/2 p-2 rounded-full hover:bg-mongodb-green-500 text-mongodb-green-500 hover:text-gray-900 transition-all duration-200"
114 |       >
115 |        <svg
116 |         xmlns="http://www.w3.org/2000/svg"
117 |         fill="none"
118 |         viewBox="0 0 24 24"
119 |         stroke-width="2"
120 |         stroke="currentColor"
121 |         class="w-5 h-5"
122 |        >
123 |         <path
124 |          stroke-linecap="round"
125 |          stroke-linejoin="round"
126 |          d="M4.5 10.5L12 3m0 0l7.5 7.5M12 3v18"
127 |         />
128 |        </svg>
129 |       </button>
130 |      </form>
131 |     </div>
132 |    </div>
133 |   
134 |    <div class="flex flex-col w-96 h-full flex-shrink-0 space-y-4 overflow-y-auto p-2">
135 |     <div class="control-panel-section">
136 |      <h3 class="control-panel-title">Session Management</h3>
137 |      <div id="session-controls" class="flex flex-col gap-3">
138 |       <div>
139 |        <p class="text-sm text-gray-400">
140 |         Each session is a distinct workspace with its own chat history and knowledge base.
141 |        </p>
142 |       </div>
143 |       <div class="flex items-center gap-2">
144 |        <label for="session-selector" class="text-sm font-medium text-gray-300">
145 |         Active Session:
146 |        </label>
147 |        <select
148 |         id="session-selector"
149 |         class="flex-grow bg-gray-700 border border-gray-600 rounded-md px-2 py-1 text-sm focus:ring-2 focus:ring-mongodb-green-500 focus:outline-none"
150 |        ></select>
151 |       </div>
152 |       <button id="new-session-btn" class="btn btn-secondary w-full">
153 |        Create New Session
154 |       </button>
155 |       <button
156 |        id="clear-history-btn"
157 |        class="btn btn-secondary w-full text-yellow-300 border-yellow-500/50 hover:bg-yellow-500/10"
158 |       >
159 |        Clear Chat History
160 |       </button>
161 |      </div>
162 |     </div>
163 |    
164 |     <div class="control-panel-section">
165 |      <h3 class="control-panel-title">Knowledge Base Tools</h3>
166 |      <div id="tool-buttons" class="grid grid-cols-3 gap-2 text-center">
167 |       <div class="tooltip-container">
168 |        <button
169 |         data-action="read_url"
170 |         class="w-full p-3 rounded-lg hover:bg-gray-700 transition-colors flex flex-col items-center"
171 |        >
172 |         <svg
173 |          xmlns="http://www.w3.org/2000/svg"
174 |          fill="none"
175 |          viewBox="0 0 24 24"
176 |          stroke-width="1.5"
177 |          stroke="currentColor"
178 |          class="w-6 h-6 mb-1 text-gray-400"
179 |         >
180 |          <path
181 |           stroke-linecap="round"
182 |           stroke-linejoin="round"
183 |           d="M13.19 8.688a4.5 4.5 0 011.242 7.244l-4.5 4.5a4.5 4.5 0 01-6.364-6.364l1.757-1.757m13.35-.622l1.757-1.757a4.5 4.5 0 00-6.364-6.364l-4.5 4.5a4.5 4.5 0 001.242 7.244"
184 |          />
185 |         </svg>
186 |         <span class="text-xs">Add URL</span>
187 |        </button>
188 |       </div>
189 |      
190 |       <div class="tooltip-container">
191 |        <button
192 |         data-action="read_file"
193 |         class="w-full p-3 rounded-lg hover:bg-gray-700 transition-colors flex flex-col items-center"
194 |        >
195 |         <svg
196 |          xmlns="http://www.w3.org/2000/svg"
197 |          fill="none"
198 |          viewBox="0 0 24 24"
199 |          stroke-width="1.5"
200 |          stroke="currentColor"
201 |          class="w-6 h-6 mb-1 text-gray-400"
202 |         >
203 |          <path
204 |           stroke-linecap="round"
205 |           stroke-linejoin="round"
206 |           d="M18.375 12.739l-7.693 7.693a4.5 4.5 0 01-6.364-6.364l10.94-10.94A3 3 0 1119.5 7.372L8.552 18.32m.009-.01l-.01.01m5.699-9.941l-7.81 7.81a1.5 1.5 0 002.122 2.122l7.81-7.81"
207 |          />
208 |         </svg>
209 |         <span class="text-xs">Add File</span>
210 |        </button>
211 |       </div>
212 |      
213 |       <div class="tooltip-container">
214 |        <button
215 |         data-action="browse_sources"
216 |         class="w-full p-3 rounded-lg hover:bg-gray-700 transition-colors flex flex-col items-center"
217 |        >
218 |         <svg
219 |          xmlns="http://www.w3.org/2000/svg"
220 |          fill="none"
221 |          viewbox="0 0 24 24"
222 |          stroke-width="1.5"
223 |          stroke="currentColor"
224 |          class="w-6 h-6 mb-1 text-gray-400"
225 |         >
226 |          <path
227 |           stroke-linecap="round"
228 |           stroke-linejoin="round"
229 |           d="M20.25 6.375c0 2.278-3.694 4.125-8.25 4.125S3.75 8.653 3.75 6.375m16.5 0c0-2.278-3.694-4.125-8.25-4.125S3.75 4.097 3.75 6.375m16.5 0v11.25c0 2.278-3.694 4.125-8.25 4.125s-8.25-1.847-8.25-4.125V6.375m16.5 0v3.75m-16.5-3.75v3.75m16.5 0v3.75C20.25 16.153 16.556 18 12 18s-8.25-1.847-8.25-4.125v-3.75"
230 |          />
231 |         </svg>
232 |         <span class="text-xs">Browse DB</span>
233 |        </button>
234 |       </div>
235 |      
236 |       <div class="tooltip-container">
237 |        <button
238 |         data-action="search_web"
239 |         class="w-full p-3 rounded-lg hover:bg-gray-700 transition-colors flex flex-col items-center"
240 |        >
241 |         <svg
242 |          xmlns="http://www.w3.org/2000/svg"
243 |          fill="none"
244 |          viewbox="0 0 24 24"
245 |          stroke-width="1.5"
246 |          stroke="currentColor"
247 |          class="w-6 h-6 mb-1 text-gray-400"
248 |         >
249 |          <path
250 |           stroke-linecap="round"
251 |           stroke-linejoin="round"
252 |           d="M12 21a9.004 9.004 0 008.716-6.747M12 21a9.004 9.004 0 01-8.716-6.747M12 21c2.485 0 4.5-4.03 4.5-9S14.485 3 12 3m0 18c-2.485 0-4.5-4.03-4.5-9S9.515 3 12 3m0 0a8.997 8.997 0 017.843 4.582M12 3a8.997 8.997 0 00-7.843 4.582m15.686 0A11.953 11.953 0 0112 10.5c-2.998 0-5.74-1.1-7.843-2.918m15.686 0A8.959 8.959 0 0121 12c0 .778-.099 1.533-.284 2.253m0 0A11.953 11.953 0 0012 13.5c-2.998 0-5.74-1.1-7.843-2.918"
253 |          />
254 |         </svg>
255 |         <span class="text-xs">Search Web</span>
256 |        </button>
257 |       </div>
258 |      
259 |       <div class="tooltip-container">
260 |        <button
261 |         data-action="list_sources"
262 |         class="w-full p-3 rounded-lg hover:bg-gray-700 transition-colors flex flex-col items-center"
263 |        >
264 |         <svg
265 |          xmlns="http://www.w3.org/2000/svg"
266 |          fill="none"
267 |          viewbox="0 0 24 24"
268 |          stroke-width="1.5"
269 |          stroke="currentColor"
270 |          class="w-6 h-6 mb-1 text-gray-400"
271 |         >
272 |          <path
273 |           stroke-linecap="round"
274 |           stroke-linejoin="round"
275 |           d="M12 6.042A8.967 8.967 0 006 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 016 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 016-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0018 18a8.967 8.967 0 00-6-2.292m0-14.25v14.25"
276 |          />
277 |         </svg>
278 |         <span class="text-xs">List Sources</span>
279 |        </button>
280 |       </div>
281 |      
282 |       <div class="tooltip-container">
283 |        <button
284 |         data-action="remove_all"
285 |         class="w-full p-3 rounded-lg hover:bg-gray-700 transition-colors flex flex-col items-center"
286 |        >
287 |         <svg
288 |          xmlns="http://www.w3.org/2000/svg"
289 |          fill="none"
290 |          viewbox="0 0 24 24"
291 |          stroke-width="1.5"
292 |          stroke="currentColor"
293 |          class="w-6 h-6 mb-1 text-red-500/70 hover:text-red-500"
294 |         >
295 |          <path
296 |           stroke-linecap="round"
297 |           stroke-linejoin="round"
298 |           d="M14.74 9l-.346 9m-4.788 0L9.26 9m9.968-3.21c.342.052.682.107 1.022.166m-1.022-.165L18.16 19.673a2.25 2.25 0 01-2.244 2.077H8.084a2.25 2.25 0 01-2.244-2.077L4.772 5.79m14.456 0a4.8108 4.8108 0 00-3.478-.397m-12 .562c.34-.059.68-.114 1.022-.165m0 0a4.811 4.811 0 013.478-.397m7.5 0v-.916c0-1.18-.91-2.164-2.09-2.201a51.964 51.964 0 00-3.32 0c-1.18.037-2.09 1.022-2.09 2.201v.916m7.5 0a48.667 4.8667 0 00-7.5 0"
299 |          />
300 |         </svg>
301 |         <span class="text-xs text-red-400">Forget All</span>
302 |        </button>
303 |       </div>
304 |      </div>
305 |     </div>
306 |    
307 |     <div class="control-panel-section">
308 |      <h3 class="control-panel-title">Retrieval Settings</h3>
309 |      <div id="rag-controls" class="space-y-4 text-sm">
310 |       <div class="flex items-center gap-2">
311 |        <label for="embedding-model-selector" class="font-medium text-gray-300">
312 |         Search Model:
313 |        </label>
314 |        <select
315 |         id="embedding-model-selector"
316 |         class="flex-grow bg-gray-700 border border-gray-600 rounded-md px-2 py-1 text-sm focus:ring-2 focus:ring-mongodb-green-500 focus:outline-none"
317 |        ></select>
318 |       </div>
319 |      
320 |       <div class="flex items-center gap-2">
321 |        <label for="num-sources-input" class="font-medium text-gray-300 tooltip-container">
322 |         k:
323 |         <div class="tooltip-text">Number of chunks to retrieve</div>
324 |        </label>
325 |        <input
326 |         type="number"
327 |         id="num-sources-input"
328 |         value="3"
329 |         min="1"
330 |         max="10"
331 |         class="w-16 bg-gray-700 border border-gray-600 rounded-md px-2 py-1 focus:ring-2 focus:ring-mongodb-green-500 focus:outline-none"
332 |        />
333 |       </div>
334 |      
335 |       <div class="space-y-2">
336 |        <div class="flex justify-between items-center tooltip-container">
337 |         <label for="min-score-input" class="font-medium text-gray-300"
338 |          >Min Score:</label
339 |         >
340 |         <span id="min-score-value" class="font-mono text-mongodb-green-500"
341 |          >0.00</span
342 |         >
343 |         <div class="tooltip-text">
344 |          Minimum relevance score (0.0 to 1.0). Chunks below this are discarded.
345 |         </div>
346 |        </div>
347 |        <input
348 |         type="range"
349 |         id="min-score-input"
350 |         min="0"
351 |         max="1"
352 |         step="0.01"
353 |         value="0.0"
354 |         class="w-full h-2 bg-gray-700 rounded-lg appearance-none cursor-pointer"
355 |        />
356 |       </div>
357 |      
358 |       <div class="space-y-2">
359 |        <div class="flex justify-between items-center tooltip-container">
360 |         <label for="max-chunk-length-input" class="font-medium text-gray-300"
361 |          >Max Chars:</label
362 |         >
363 |         <span id="max-chars-value" class="font-mono text-mongodb-green-500"
364 |          >2000</span
365 |         >
366 |         <div class="tooltip-text">
367 |          Max characters per retrieved chunk. Truncates longer chunks.
368 |         </div>
369 |        </div>
370 |        <input
371 |         type="range"
372 |         id="max-chunk-length-input"
373 |         min="200"
374 |         max="4000"
375 |         step="100"
376 |         value="2000"
377 |         class="w-full h-2 bg-gray-700 rounded-lg appearance-none cursor-pointer"
378 |        />
379 |       </div>
380 |      </div>
381 |     </div>
382 |    </div>
383 |   </div>
384 |  
385 |   <div
386 |    id="modal-overlay"
387 |    class="fixed inset-0 bg-black/70 backdrop-blur-sm flex items-center justify-center p-4 z-50 opacity-0 invisible transition-opacity duration-300"
388 |   >
389 |    <div
390 |     id="modal-container"
391 |     class="bg-gray-800 rounded-lg shadow-xl w-full max-w-5xl p-6 border border-gray-700 scale-95 opacity-0"
392 |    >
393 |     <h3 id="modal-title" class="text-xl font-bold text-white mb-2">Modal Title</h3>
394 |     <p id="modal-text" class="text-gray-400 mb-4">Some description text here.</p>
395 |     <div id="modal-content-host"></div>
396 |     <div id="modal-buttons" class="flex justify-end gap-3 mt-6">
397 |      <button id="modal-btn-cancel" class="btn btn-secondary">Cancel</button>
398 |      <button id="modal-btn-submit" class="btn btn-primary">Submit</button>
399 |     </div>
400 |    </div>
401 |   </div>
402 |  
403 |   <div
404 |    id="source-browser-overlay"
405 |    class="fixed inset-0 bg-black/70 backdrop-blur-sm flex items-center justify-center p-4 z-50 opacity-0 invisible transition-opacity duration-300"
406 |   >
407 |    <div
408 |     id="source-browser-container"
409 |     class="bg-gray-800 rounded-lg shadow-xl w-full max-w-6xl h-[80vh] p-6 border border-gray-700 scale-95 opacity-0 flex flex-col"
410 |    >
411 |     <div class="flex justify-between items-center mb-4 flex-shrink-0">
412 |      <h3 class="text-xl font-bold text-white"> Source Browser <span id="source-browser-total-chunks" class="text-base font-normal text-gray-400 ml-2"></span></h3>
413 |      <button
414 |       id="source-browser-close-btn"
415 |       class="p-2 rounded-full hover:bg-gray-700"
416 |      >
417 |       &times;
418 |      </button>
419 |     </div>
420 |     <div class="flex-grow flex gap-4 overflow-hidden">
421 |      <div class="w-1/3 flex flex-col bg-gray-900/50 rounded-lg p-3">
422 |       <div class="sb-column-header">
423 |        <span>Sources</span>
424 |        <span id="source-browser-source-count" class="sb-count-badge">Total: 0</span>
425 |       </div>
426 |       <div id="source-list" class="overflow-y-auto flex-grow space-y-1 pr-2"></div>
427 |      </div>
428 |      <div class="w-2/3 flex flex-col bg-gray-900/50 rounded-lg p-3">
429 |       <div class="sb-column-header">
430 |        <span>Chunks</span>
431 |        <span id="source-browser-selected-chunk-count" class="sb-count-badge">Selected: 0</span>
432 |       </div>
433 |       <div class="overflow-y-auto flex-grow">
434 |        <div
435 |         id="chunk-list-placeholder"
436 |         class="text-gray-400 text-center py-10"
437 |        >
438 |         Select a source to view its chunks.
439 |        </div>
440 |        <div id="chunk-list" class="chunk-list-container"></div>
441 |       </div>
442 |      </div>
443 |     </div>
444 |    </div>
445 |   </div>
446 |   <script src="/static/script.js"></script>
447 |  </body>
448 | </html>


--------------------------------------------------------------------------------
/blog.md:
--------------------------------------------------------------------------------
  1 | # Interactive RAG with MongoDB Atlas + Function Calling API
  2 | ## Introduction: Unveiling the Power of Interactive Knowledge Discovery
  3 | 
  4 | Imagine yourself as a detective investigating a complex case. Traditional retrieval-augmented generation (RAG) acts as your static assistant, meticulously sifting through mountains of evidence based on a pre-defined strategy. While helpful, this approach lacks the flexibility needed for today's ever-changing digital landscape.
  5 | 
  6 | Enter interactive RAG – the next generation of information access. It empowers users to become active knowledge investigators by:
  7 | 
  8 | * **Dynamically adjusting retrieval strategies:** Tailor the search to your specific needs by fine-tuning parameters like the number of sources, chunk size, and retrieval algorithms.
  9 | * **Staying ahead of the curve:**  As new information emerges, readily incorporate it into your retrieval strategy to stay up-to-date and relevant.
 10 | * **Enhancing LLM performance:** Optimize the LLM's workload by dynamically adjusting the information flow, leading to faster and more accurate analysis.
 11 | 
 12 | Before you continue, make sure you understand the basics of:
 13 | 
 14 | - [LLMs](https://www.mongodb.com/basics/large-language-models).
 15 | - [ RAG](https://www.mongodb.com/basics/retrieval-augmented-generation).
 16 | - [Using a vector database](https://www.mongodb.com/basics/vector-databases).
 17 | 
 18 | ## ![Retrieval Augmented Generation - Diagram 1](./images/RAG-chunks.png)
 19 | (_image from  [Session 7: RAG Evaluation with RAGAS and How to Improve Retrieval](https://www.youtube.com/watch?v=mEv-2Xnb_Wk))_
 20 | 
 21 | ## Optimizing your retrieval strategy: static vs. interactive RAG
 22 | 
 23 | Choosing between static and interactive retrieval-augmented generation approaches is crucial for optimizing your application's retrieval strategy. Each approach offers unique advantages and disadvantages, tailored to specific use cases:
 24 | 
 25 | **Static RAG:** A static RAG approach is pre-trained on a fixed knowledge base, meaning the information it can access and utilize is predetermined and unchanging. This allows for faster inference times and lower computational costs, making it ideal for applications requiring real-time responses, such as chatbots and virtual assistants.
 26 | 
 27 | **Pros:**
 28 | 
 29 | * **Faster response:** Pre-loaded knowledge bases enable rapid inference, ideal for real-time applications like chatbots and virtual assistants.
 30 | * **Lower cost:** Static RAG requires fewer resources for training and maintenance, making it suitable for resource-constrained environments.
 31 | * **Controlled content:** Developers have complete control over the model's knowledge base, ensuring targeted and curated responses in sensitive applications.
 32 | * **Consistent results:** Static RAG provides stable outputs even when underlying data changes, ensuring reliability in data-intensive scenarios.
 33 | 
 34 | **Cons:**
 35 | 
 36 | * **Limited knowledge:** Static RAG is confined to its pre-loaded knowledge, limiting its versatility compared to interactive RAG accessing external data.
 37 | * **Outdated information:** Static knowledge bases can become outdated, leading to inaccurate or irrelevant responses if not frequently updated.
 38 | * **Less adaptable:** Static RAG can struggle to adapt to changing user needs and preferences, limiting its ability to provide personalized or context-aware responses.
 39 | 
 40 | **Interactive RAG:** An interactive RAG approach is trained on a dynamic knowledge base, allowing it to access and process real-time information from external sources such as online databases and APIs. This enables it to provide up-to-date and relevant responses, making it suitable for applications requiring access to constantly changing data.
 41 | 
 42 | **Pros:**
 43 | 
 44 | * **Up-to-date information:** Interactive RAG can access and process real-time external information, ensuring current and relevant responses, which is particularly valuable for applications requiring access to frequently changing data.
 45 | * **Greater flexibility:** Interactive RAG can adapt to user needs and preferences by incorporating feedback and interactions into their responses, enabling personalized and context-aware experiences.
 46 | * **Vast knowledge base:** Access to external information provides an almost limitless knowledge pool, allowing interactive RAG to address a wider range of queries and deliver comprehensive and informative responses.
 47 | 
 48 | **Cons:**
 49 | 
 50 | * **Slower response:** Processing external information increases inference time, potentially hindering real-time applications.
 51 | * **Higher cost:** Interactive RAG requires more computational resources, making it potentially unsuitable for resource-constrained environments.
 52 | * **Bias risk:** External information sources may contain biases or inaccuracies, leading to biased or misleading responses if not carefully mitigated.
 53 | * **Security concerns:** Accessing external sources introduces potential data security risks, requiring robust security measures to protect sensitive information.
 54 | 
 55 | ### Choosing the right approach
 56 | 
 57 | While this tutorial focuses specifically on interactive RAG, the optimal approach depends on your application's specific needs and constraints. Consider:
 58 | 
 59 | * **Data size and update frequency:** Static models are suitable for static or infrequently changing data, while interactive RAG is necessary for frequently changing data.
 60 | * **Real-time requirements:** Choose static RAG for applications requiring fast response times. For less critical applications, interactive RAG may be preferred.
 61 | * **Computational resources:** Evaluate your available resources when choosing between static and interactive approaches.
 62 | * **Data privacy and security:** Ensure your chosen approach adheres to all relevant data privacy and security regulations.
 63 | 
 64 | 
 65 | ## Chunking: a hidden hero in the rise of GenAI
 66 | 
 67 | Now, let's put our detective hat back on. If you have a mountain of evidence available for a particular case, you wouldn't try to analyze every piece of evidence at once, right? You'd break it down into smaller, more manageable pieces — documents, witness statements, physical objects — and examine each one carefully. In the world of large language models, this process of breaking down information is called _chunking_, and it plays a crucial role in unlocking the full potential of retrieval-augmented generation.
 68 | 
 69 | Just like a detective, an LLM can't process a mountain of information all at once. Chunking helps it break down text into smaller, more digestible pieces called _chunks_. Think of these chunks as bite-sized pieces of knowledge that the LLM can easily analyze and understand. This allows the LLM to focus on specific sections of the text, extract relevant information, and generate more accurate and insightful responses.
 70 | 
 71 | However, the size of each chunk isn't just about convenience for the LLM; it also significantly impacts the _retrieval vector relevance score_, a key metric in evaluating the effectiveness of chunking strategies. The process involves converting text to vectors, measuring the distance between them, utilizing ANN/KNN algorithms, and calculating a score for the generated vectors.
 72 | 
 73 | Here is an example: Imagine asking "What is a mango?" and the LLM dives into its knowledge base, encountering these chunks:
 74 | 
 75 | **High scores:**
 76 | 
 77 | * **Chunk:** "Mango is a tropical stone fruit with a sweet, juicy flesh and a single pit." (Score: 0.98)
 78 | * **Chunk:** "In India, mangoes are revered as the 'King of Fruits' and hold cultural significance." (Score: 0.92)
 79 | * **Chunk:** "The mango season brings joy and delicious treats like mango lassi and mango ice cream." (Score: 0.85)
 80 | 
 81 | These chunks directly address the question, providing relevant information about the fruit's characteristics, cultural importance, and culinary uses. High scores reflect their direct contribution to answering your query.
 82 | 
 83 | **Low scores:**
 84 | 
 85 | * **Chunk:** "Volcanoes spew molten lava and ash, causing destruction and reshaping landscapes." (Score: 0.21)
 86 | * **Chunk:** "The stock market fluctuates wildly, driven by economic factors and investor sentiment." (Score: 0.42)
 87 | * **Chunk:** "Mitochondria, the 'powerhouses of the cell,' generate energy for cellular processes." (Score: 0.55)
 88 | 
 89 | These chunks, despite containing interesting information, are completely unrelated to mangoes. They address entirely different topics, earning low scores due to their lack of relevance to the query.
 90 | 
 91 | Check out [ChunkViz v0.1](https://www.chunkviz.com/) to get a feel for how chunk size (character length) breaks down text.
 92 | 
 93 | ![Chunk Visualization](./images/chunkviz-1.png)
 94 | 
 95 | **Balancing detail and context:**
 96 | 
 97 | The size of each chunk influences the retrieval vector relevance score in distinct ways:
 98 | 
 99 | **Smaller chunk size:**
100 | 
101 | * **Pros:**
102 |     * Precise focus on specific details and nuances
103 |     * Potentially higher relevance scores due to accurate information extraction
104 |     * Increased sensitivity to subtle changes in meaning
105 | * **Cons:**
106 |     * May sacrifice broader context and understanding of the overall message
107 |     * Requires more computational resources to process numerous chunks
108 |     * Increased risk of missing relevant information due to limited context
109 | 
110 | **Larger chunk size:**
111 | 
112 | * **Pros:**
113 |     * Provides a richer context for comprehending the overall message
114 |     * More efficient processing with fewer chunks to handle
115 |     * Potentially higher relevance scores for related chunks due to broader context
116 | * **Cons:**
117 |     * May overlook specific details and subtle shifts in meaning
118 |     * Increased risk of including irrelevant information within a chunk, potentially lowering the relevance score
119 | 
120 | **Examples in action:**
121 | 
122 | **Smaller chunk size:**
123 | 
124 | * **Example:** Analyzing specific clauses in a legal document to identify potential inconsistencies
125 | * **Benefit:** Increased precision in detecting subtle shifts in meaning and ensuring accurate retrieval of relevant information
126 | 
127 | **Larger chunk size:**
128 | 
129 | * **Example:** Summarizing a long document by extracting key ideas and information across various sections
130 | * **Benefit:** Improved context for comprehending the overall message and the relationships between different parts of the text
131 | 
132 | **Considerations for optimal chunking:**
133 | 
134 | Finding the ideal chunk size is a delicate balance between focusing on specific details and capturing the broader context. Several factors influence this:
135 | 
136 | * **Task at hand:** For tasks like question-answering, smaller chunks might be preferred for pinpoint accuracy. In contrast, summarization tasks benefit from larger chunks for better context.
137 | * **Data type:** Different types of data might require different chunking approaches. For example, code might be chunked differently than a news article.
138 | * **Desired accuracy:** Smaller chunks can lead to higher precision, while larger chunks might offer better overall understanding.
139 | 
140 | **Unlocking the future:**
141 | 
142 | Effective chunking maximizes the retrieval vector relevance score, enabling LLMs to generate the most accurate and insightful responses possible. By understanding the impact of chunk size and other relevant factors, we can unleash the full potential of LLMs and unlock exciting opportunities for the future. In this tutorial, the chunk size we will be controlling interactively is the retrieval chunk.
143 | 
144 | ## Interactive retrieval-augmented generation
145 | 
146 | ## ![RAG Agent Architecture for this Tutorial](./images/rag-agent.png)
147 | 
148 | In this tutorial, we will showcase an interactive RAG agent. An agent is a computer program or system designed to perceive its environment, make decisions, and achieve specific goals. The interactive RAG agent we will showcase supports the following actions:
149 | - answering questions
150 | - searching the web
151 | - reading web content (URLs)
152 | - listing all sources
153 | - removing sources
154 | - resetting messages
155 | - modifying rag strategy (num_sources, chunk_size, etc.)
156 | 
157 | ## Taking control with interactive RAG
158 | 
159 | While an optimized chunk size is crucial, interactive RAG goes a step further. It empowers users to dynamically adjust their RAG strategy in real-time, using the function calling API of large language models. This unlocks a new era of personalized information access and knowledge management.
160 | 
161 | This interactive RAG tutorial leverages:
162 | 
163 | * **Dynamic strategy adjustment:** Unlike traditional RAG approaches, users can fine-tune chunk size, the number of sources, and other parameters on the fly, tailoring the LLM's response to their specific needs.
164 | * **Function calling API integration:** Function calling API seamlessly integrates external tools and services with LLMs. This allows users to seamlessly incorporate their data sources and tools into their RAG workflow.
165 | 
166 | **Benefits:**
167 | 
168 | * Enhanced information retrieval and knowledge management
169 | * Improved accuracy and relevance of LLM responses
170 | * Flexible and versatile framework for building AI applications
171 | 
172 | 
173 | ## Ingesting content into your vector database
174 | 
175 | ### Streamlining content ingestion with function calling
176 | 
177 | While vector databases offer significant advantages for GenAI applications, the process of ingesting content can feel cumbersome. Fortunately, we can harness the power of function calling API to seamlessly add new content to the database, simplifying the workflow and ensuring continuous updates.
178 | 
179 | ### Choosing the right home for your embeddings
180 | 
181 | While various databases can store vector embeddings, each with unique strengths, [MongoDB Atlas](https://cloud.mongodb.com) stands out for GenAI applications. Imagine MongoDB as a delicious cake you can both bake and eat. Not only does it offer the familiar features of MongoDB, but it also lets you store and perform mathematical operations on your vector embeddings directly within the platform. This eliminates the need for separate tools and streamlines the entire process.
182 | 
183 | By leveraging the combined power of function calling API and MongoDB Atlas, you can streamline your content ingestion process and unlock the full potential of vector embeddings for your GenAI applications.
184 | 
185 | ![RAG architecture diagram with MongoDB Atlas](./images/mdb_diagram.png)
186 | 
187 | ### Detailed breakdown  
188 |    
189 | 1. **Vector embeddings**: MongoDB Atlas provides the functionality to store vector embeddings at the core of your document. These embeddings are generated by converting text, video, or audio into vectors utilizing models such as [GPT4All](https://gpt4all.io/index.html), [OpenAI](https://openai.com/) or [Hugging Face](https://huggingface.co/).
190 | 
191 |     ```python
192 |    	 # Chunk Ingest Strategy
193 |    	 self.text_splitter = RecursiveCharacterTextSplitter(
194 |             	# Set a really small chunk size, just to show.
195 |             	chunk_size=4000, # THIS CHUNK SIZE IS FIXED - INGEST CHUNK SIZE DOES NOT CHANGE
196 |             	chunk_overlap=200, # CHUNK OVERLAP IS FIXED
197 |             	length_function=len,
198 |             	add_start_index=True,
199 |    	 )
200 |    	 # load data from webpages using Playwright. One document will be created for each webpage
201 |    	 # split the documents using a text splitter to create "chunks"
202 |    	 loader = PlaywrightURLLoader(urls=urls, remove_selectors=["header", "footer"])  
203 |    	 documents = loader.load_and_split(self.text_splitter)
204 |    	 self.index.add_documents(
205 |        		 documents
206 |    	 )   
207 |     ```
208 | 
209 | 2. **Vector index**: When employing vector search, it's necessary to [create a search index](https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/). This process entails setting up the vector path, aligning the dimensions with your chosen model, and selecting a vector function for searching the top K-nearest neighbors.  
210 |     ```python
211 |     	{
212 |         	"name": "<index-name>",
213 |         	"type": "vectorSearch",
214 |         	"fields":[
215 |             	{
216 |             	"type": "vector",
217 |             	"path": <field-to-index>,
218 |             	"numDimensions": <number-of-dimensions>,
219 |             	"similarity": "euclidean | cosine | dotProduct"
220 |             	},
221 |             	...
222 |         	]
223 |     	}
224 |     ```
225 | 3. **Chunk retrieval**: Once the vector embeddings are indexed, an aggregation pipeline can be created on your embedded vector data to execute queries and retrieve results. This is accomplished using the [$vectorSearch](https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage) operator, a new aggregation stage in Atlas.
226 | 
227 |     ```python
228 |     def recall(self, text, n_docs=2, min_rel_score=0.25, chunk_max_length=800,unique=True):
229 |    		 #$vectorSearch
230 |    		 print("recall=>"+str(text))
231 |    		 response = self.collection.aggregate([
232 |    		 {
233 |        		 "$vectorSearch": {
234 |            		 "index": "default",
235 |            		 "queryVector": self.gpt4all_embd.embed_query(text), #GPT4AllEmbeddings()
236 |            		 "path": "embedding",
237 |            		 #"filter": {},
238 |            		 "limit": 15, #Number (of type int only) of documents to return in the results. Value can't exceed the value of numCandidates.
239 |            		 "numCandidates": 50 #Number of nearest neighbors to use during the search. You can't specify a number less than the number of documents to return (limit).
240 |        		 }
241 |    		 },
242 |    		 {
243 |        		 "$addFields":
244 |        		 {
245 |            		 "score": {
246 |            		 "$meta": "vectorSearchScore"
247 |        		 }
248 |    		 }
249 |    		 },
250 |    		 {
251 |        		 "$match": {
252 |            		 "score": {
253 |            		 "$gte": min_rel_score
254 |        		 }
255 |    		 }
256 |    		 },{"$project":{"score":1,"_id":0, "source":1, "text":1}}])
257 |    		 tmp_docs = []
258 |    		 str_response = []
259 |    		 for d in response:
260 |        		 if len(tmp_docs) == n_docs:
261 |            		 break
262 |        		 if unique and d["source"] in tmp_docs:
263 |            		 continue
264 |        		 tmp_docs.append(d["source"])
265 |        		 str_response.append({"URL":d["source"],"content":d["text"][:chunk_max_length],"score":d["score"]})
266 |    		 kb_output = f"Knowledgebase Results[{len(tmp_docs)}]:\n```{str(str_response)}```\n## \n```SOURCES: "+str(tmp_docs)+"```\n\n"
267 |    		 self.st.write(kb_output)
268 |    		 return str(kb_output)
269 |     ```
270 | 
271 | In this tutorial, we will mainly be focusing on the **CHUNK RETRIEVAL** strategy using the function calling API of LLMs and MongoDB Atlas as our **[data platform](https://www.mongodb.com/atlas)**.
272 | 
273 | ## Key features of MongoDB Atlas
274 | MongoDB Atlas offers a robust vector search platform with several key features, including:
275 | 
276 | 1. **$vectorSearch operator:**
277 | This powerful aggregation pipeline operator allows you to search for documents based on their vector embeddings. You can specify the index to search, the query vector, and the similarity metric to use. [$vectorSearch](https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage) provides efficient and scalable search capabilities for vector data.
278 | 
279 | 2. **Flexible filtering:**
280 | You can combine $vectorSearch with other aggregation pipeline operators like [$match](https://www.mongodb.com/docs/v7.0/reference/operator/aggregation/match/), [$sort](https://www.mongodb.com/docs/v7.0/reference/operator/aggregation/sort/), and [$limit](https://www.mongodb.com/docs/v7.0/reference/operator/aggregation/limit/) to filter and refine your search results. This allows you to find the most relevant documents based on both their vector embeddings and other criteria.
281 | 
282 | 3. **Support for various similarity metrics:**
283 | MongoDB Atlas supports different similarity metrics like [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) and [euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance), allowing you to choose the best measure for your specific data and task.
284 | 
285 | 4. **High performance:**
286 | The vector search engine in MongoDB Atlas is optimized for large datasets and high query volumes, ensuring efficient and responsive search experiences.
287 | 
288 | 5. **Scalability:**
289 | MongoDB Atlas scales seamlessly to meet your growing needs, allowing you to handle increasing data volumes and query workloads effectively.
290 | 
291 | **Additionally, MongoDB Atlas offers several features relevant to its platform capabilities:**
292 | 
293 | * **Global availability:**
294 | Your data is stored in multiple data centers around the world, ensuring high availability and disaster recovery.
295 | * **Security:**
296 | MongoDB Atlas provides robust security features, including encryption at rest and in transit, access control, and data audit logging.
297 | * **Monitoring and alerting:**
298 | MongoDB Atlas provides comprehensive monitoring and alerting features to help you track your cluster's performance and identify potential issues.
299 | * **Developer tools:**
300 | MongoDB Atlas offers various developer tools and APIs to simplify development and integration with your applications.
301 | 
302 | ## OpenAI function calling:
303 | OpenAI's function calling is a powerful capability that enables users to seamlessly interact with OpenAI models, such as GPT-3.5, through programmable commands. This functionality allows developers and enthusiasts to harness the language model's vast knowledge and natural language understanding by incorporating it directly into their applications or scripts. Through function calling, users can make specific requests to the model, providing input parameters and receiving tailored responses. This not only facilitates more precise and targeted interactions but also opens up a world of possibilities for creating dynamic, context-aware applications that leverage the extensive linguistic capabilities of OpenAI's models. Whether for content generation, language translation, or problem-solving, OpenAI function calling offers a flexible and efficient way to integrate cutting-edge language processing into various domains.
304 | 
305 | ## Key features of OpenAI function calling:
306 | - Function calling allows you to connect large language models to external tools.
307 | - The [Chat Completions API](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) generates JSON that can be used to call functions in your code.
308 | - The latest models have been trained to detect when a function should be called and respond with JSON that adheres to the function signature.
309 | - Building user confirmation flows is recommended before taking actions that impact the world on behalf of users.
310 | - Function calling can be used to create assistants that answer questions by calling external APIs, convert natural language into API calls, and extract structured data from text.
311 | - The basic sequence of steps for function calling involves calling the model, parsing the JSON response, calling the function with the provided arguments, and summarizing the results back to the user.
312 | - Function calling is supported by specific model versions, including [GPT-4](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and [GPT-3.5-turbo](https://platform.openai.com/docs/models/gpt-3-5).
313 | - Parallel function calling allows multiple function calls to be performed together, reducing round-trips with the API.
314 | - Tokens are used to inject functions into the system message and count against the model's context limit and billing.
315 | 
316 | ![Function Calling Diagram - Simple](./images/function_calling.png)
317 | 
318 | Read more at ThinhDA.
319 | 
320 | ## Function calling API basics: actions
321 | 
322 | Actions are functions that an agent can invoke. There are two important design considerations around actions:
323 | 
324 |     * Giving the agent access to the right actions
325 |     * Describing the actions in a way that is most helpful to the agent
326 | 
327 | ## Crafting actions for effective agents
328 | 
329 | **Actions are the lifeblood of an agent's decision-making.** They define the options available to the agent and shape its interactions with the environment. Consequently, designing effective actions is crucial for building successful agents.
330 | 
331 | Two key considerations guide this design process:
332 | 
333 | 1. **Access to relevant actions:** Ensure the agent has access to actions necessary to achieve its objectives. Omitting critical actions limits the agent's capabilities and hinders its performance.
334 | 2. **Action description clarity:** Describe actions in a way that is informative and unambiguous for the agent. Vague or incomplete descriptions can lead to misinterpretations and suboptimal decisions.
335 | 
336 | By carefully designing actions that are both accessible and well-defined, you equip your agent with the tools and knowledge necessary to navigate its environment and achieve its objectives.
337 | 
338 | Further considerations:
339 | 
340 | * **Granularity of actions:** Should actions be high-level or low-level? High-level actions offer greater flexibility but require more decision-making, while low-level actions offer more control but limit adaptability.
341 | * **Action preconditions and effects:** Clearly define the conditions under which an action can be taken and its potential consequences. This helps the agent understand the implications of its choices.
342 | 
343 | 
344 | If you don't give the agent the right actions and describe them in an effective way, you won’t be able to build a working agent.
345 | 
346 | ![LangChain Tools Diagram](./images/llm_agent.png)
347 | (_Credit to blog post: [Make Langchain Agent Actually Work With Local LLMs (Vicuna, WizardLM)](https://betterprogramming.pub/make-langchain-agent-actually-works-with-local-llms-vicuna-wizardlm-etc-da42b6b1a97)_)
348 | 
349 | An LLM is then called, resulting in either a response to the user or action(s) to be taken. If it is determined that a response is required, then that is passed to the user, and that cycle is finished. If it is determined that an action is required, that action is then taken, and an observation (action result) is made. That action and corresponding observation are added back to the prompt (we call this an “agent scratchpad”), and the loop resets — i.e., the LLM is called again (with the updated agent scratchpad).
350 | 
351 | ## Getting started
352 | 
353 | Clone the demo Github repository.
354 | ```bash
355 | git clone git@github.com:ranfysvalle02/Interactive-RAG.git
356 | ```
357 | 
358 | Create a new Python environment.
359 | ```bash
360 | python3 -m venv env
361 | ```
362 | 
363 | Activate the new Python environment.
364 | ```bash
365 | source env/bin/activate
366 | ```
367 | 
368 | Install the requirements.
369 | ```bash
370 | pip3 install -r requirements.txt
371 | ```
372 | Set the parameters in [params.py](rag/params.py):
373 | ```bash
374 | # MongoDB
375 | MONGODB_URI = ""
376 | DATABASE_NAME = "genai"
377 | COLLECTION_NAME = "rag"
378 | 
379 | # If using OpenAI
380 | OPENAI_API_KEY = ""
381 | 
382 | # If using Azure OpenAI
383 | #OPENAI_TYPE = "azure"
384 | #OPENAI_API_VERSION = "2023-10-01-preview"
385 | #OPENAI_AZURE_ENDPOINT = "https://.openai.azure.com/"
386 | #OPENAI_AZURE_DEPLOYMENT = ""
387 | 
388 | ```
389 | Create a Search index with the following definition:
390 | ```JSON
391 | {
392 |   "type": "vectorSearch",
393 |   "fields": [
394 | 	{
395 |   	"numDimensions": 384,
396 |   	"path": "embedding",
397 |   	"similarity": "cosine",
398 |   	"type": "vector"
399 | 	}
400 |   ]
401 | }
402 | ```
403 | 
404 | Set the environment.
405 | ```bash
406 | export OPENAI_API_KEY=
407 | ```
408 | 
409 | To run the RAG application:
410 | 
411 | ```bash
412 | env/bin/streamlit run rag/app.py
413 | ```
414 | Log information generated by the application will be appended to app.log.
415 | 
416 | ## Usage
417 | This bot supports the following actions: answering questions, searching the web, reading URLs, removing sources, listing all sources, viewing messages, and resetting messages.
418 | 
419 | It also supports an action called iRAG that lets you dynamically control your agent's RAG strategy.
420 | 
421 | Ex: "set RAG config to 3 sources and chunk size 1250" => New RAG config:{'num_sources': 3, 'source_chunk_size': 1250, 'min_rel_score': 0, 'unique': True}.
422 | 
423 | If the bot is unable to provide an answer to the question from data stored in the Atlas Vector store and your RAG strategy (number of sources, chunk size, min_rel_score, etc), it will initiate a web search to find relevant information. You can then instruct the bot to read and learn from those results.
424 | 
425 | 
426 | ## Demo
427 | 
428 | Let's start by asking our agent a question — in this case, "What is a mango?" The first thing that will happen is it will try to "recall" any relevant information using vector embedding similarity. It will then formulate a response with the content it "recalled" or will perform a web search. Since our knowledge base is currently empty, we need to add some sources before it can formulate a response.
429 | 
430 | ![DEMO - Ask a Question](./images/ask_question.png)
431 | 
432 | Since the bot is unable to provide an answer using the content in the vector database, it initiated a Google search to find relevant information. We can now tell it which sources it should "learn." In this case, we'll tell it to learn the first two sources from the search results.
433 | 
434 | 
435 | 
436 | ![DEMO - Add a source](./images/add_sources.png)
437 | 
438 | ## Change RAG strategy
439 | 
440 | Next, let's modify the RAG strategy! Let's make it only use one source and have it use a small chunk size of 500 characters.
441 | 
442 | ![DEMO - Change RAG strategy part 1](./images/mod_rag.png)
443 | 
444 | Notice that though it was able to retrieve a chunk with a fairly high relevance score, it was not able to generate a response because the chunk size was too small and the chunk content was not relevant enough to formulate a response. Since it could not generate a response with the small chunk, it performed a web search on the user's behalf.
445 | 
446 | Let's see what happens if we increase the chunk size to 3,000 characters instead of 500.
447 | 
448 | ![DEMO - Change RAG strategy part 2](./images/mod_rag-2.png)
449 | 
450 | Now, with a larger chunk size, it was able to accurately formulate the response using the knowledge from the vector database!
451 | 
452 | ## List all sources
453 | 
454 | Let's see what's available in the knowledge base of the agent by asking it, “What sources do you have in your knowledge base?”
455 | 
456 | ![DEMO - List all sources](./images/list_sources.png)
457 | 
458 | ## Remove a source of information
459 | 
460 | If you want to remove a specific resource, you could do something like:
461 | ```
462 | USER: remove source 'https://www.oracle.com' from the knowledge base
463 | ```
464 | 
465 | To remove all the sources in the collection, we could do something like:
466 | 
467 | ![DEMO - Remove ALL sources](./images/forget.png)
468 | 
469 | This demo has provided a glimpse into the inner workings of our AI agent, showcasing its ability to learn and respond to user queries in an interactive manner. We've witnessed how it seamlessly combines its internal knowledge base with real-time web search to deliver comprehensive and accurate information. The potential of this technology is vast, extending far beyond simple question-answering. None of this would be possible without the magic of the function calling API.
470 | 
471 | ## Embracing the future of information access with interactive RAG
472 | 
473 | This post has explored the exciting potential of interactive retrievalaugmented generation (RAG) with the powerful combination of MongoDB Atlas and function calling API. We've delved into the crucial role of chunking, embedding, and retrieval vector relevance score in optimizing RAG performance, unlocking its true potential for information retrieval and knowledge management.
474 | 
475 | Interactive RAG, powered by the combined forces of MongoDB Atlas and function calling API, represents a significant leap forward in the realm of information retrieval and knowledge management. By enabling dynamic adjustment of the RAG strategy and seamless integration with external tools, it empowers users to harness the full potential of LLMs for a truly interactive and personalized experience.
476 | 
477 | Intrigued by the possibilities? Explore the full source code for the interactive RAG application and unleash the power of RAG with MongoDB Atlas and function calling API in your own projects!
478 | 
479 | Together, let's unlock the transformative potential of this potent combination and forge a future where information is effortlessly accessible and knowledge is readily available to all.
480 | 
481 | View is the [full source code](https://github.com/ranfysvalle02/Interactive-RAG/) for the interactive RAG application using MongoDB Atlas and function calling API.
482 | 
483 | ### Additional MongoDB Resources
484 | 
485 | - [RAG with Atlas Vector Search, LangChain, and OpenAI](https://www.mongodb.com/developer/products/atlas/rag-atlas-vector-search-langchain-openai/)
486 | - [Taking RAG to Production with the MongoDB Documentation AI Chatbot](https://www.mongodb.com/developer/products/atlas/taking-rag-to-production-documentation-ai-chatbot/)
487 | - [What is Artificial Intelligence (AI)?](https://www.mongodb.com/basics/what-is-artificial-intelligence)
488 | - [Unlock the Power of Semantic Search with MongoDB Atlas Vector Search](https://www.mongodb.com/basics/semantic-search)
489 | - [Machine Learning in Healthcare:
490 | Real-World Use Cases and What You Need to Get Started](https://www.mongodb.com/basics/machine-learning-healthcare)
491 | - [What is Generative AI?
492 | ](https://www.mongodb.com/basics/generative-ai)
493 | 
494 | 
495 | 
496 | 
497 | 


--------------------------------------------------------------------------------
/rag/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import urllib.parse
  4 | import re
  5 | import json
  6 | import inspect
  7 | import time
  8 | import uuid
  9 | from typing import List, Dict, Any, Optional
 10 | import traceback
 11 | import tempfile
 12 | import io
 13 | import concurrent.futures
 14 | 
 15 | # --- 0. DEPENDENCIES ---
 16 | # pip install pymongo flask openai python-dotenv flask-cors requests langchain
 17 | # pip install langchain-openai langchain-mongodb ddgs docling langchain-voyageai voyageai
 18 | # REMEMBER: if previously installed "duckduckgo-search", remove it:
 19 | #    pip uninstall duckduckgo-search -y
 20 | # then "pip install ddgs".
 21 | 
 22 | # --- Core Libraries ---
 23 | import pymongo
 24 | from flask import Flask, request, jsonify, render_template, render_template_string
 25 | from flask_cors import CORS
 26 | from pymongo.operations import SearchIndexModel
 27 | from pymongo.errors import OperationFailure, ConnectionFailure
 28 | from dotenv import load_dotenv
 29 | import requests
 30 | from bson.objectid import ObjectId
 31 | from ddgs import DDGS
 32 | from docling.document_converter import DocumentConverter
 33 | 
 34 | # --- LangChain Imports ---
 35 | from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
 36 | from langchain_voyageai import VoyageAIEmbeddings
 37 | from langchain_mongodb import MongoDBAtlasVectorSearch
 38 | from langchain.agents import AgentExecutor, create_openai_tools_agent
 39 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 40 | from langchain_core.messages import HumanMessage, AIMessage
 41 | from langchain_core.tools import tool
 42 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 43 | 
 44 | # --- 1. CONFIG / ENV SETUP ---
 45 | load_dotenv()
 46 | 
 47 | DATABASE_NAME = "interactive_rag_db"
 48 | COLLECTION_NAME = "knowledge_base_sessions"
 49 | SESSION_FIELD = "session_id"
 50 | 
 51 | # Embedding configuration for multiple models
 52 | EMBEDDING_CONFIG = {
 53 |     "openai": {
 54 |         "vector_field": "embedding_openai",
 55 |         "index_name": "openai_vector_index",
 56 |         "dimensions": 1536
 57 |     },
 58 |     "voyageai": {
 59 |         "vector_field": "embedding_voyageai",
 60 |         "index_name": "voyageai_vector_index",
 61 |         "dimensions": 1024
 62 |     }
 63 | }
 64 | 
 65 | # Logging setup
 66 | logging.basicConfig(
 67 |     filename="rag_agent.log",
 68 |     filemode="a",
 69 |     format="%(asctime)s - %(levelname)s - %(message)s",
 70 |     level=logging.INFO
 71 | )
 72 | logger = logging.getLogger(__name__)
 73 | 
 74 | def print_log(message: str):
 75 |     print(message)
 76 |     logger.info(message)
 77 | 
 78 | 
 79 | # --- 2. GLOBAL AGENT STATE ---
 80 | class AgentConfig:
 81 |     def __init__(self):
 82 |         self.rag_config = {
 83 |             "num_sources": 3,
 84 |             "min_rel_score": 0.0,
 85 |             "max_chunk_length": 2000
 86 |         }
 87 |         self.embedding_clients = {}
 88 | 
 89 |         # Connect to MongoDB
 90 |         try:
 91 |             self.db_client = pymongo.MongoClient(
 92 |                 os.getenv("MDB_URI"),
 93 |                 serverSelectionTimeoutMS=10000
 94 |             )
 95 |             self.db_client.admin.command('ping')
 96 |             print_log("[INFO] MongoDB connection successful.")
 97 |         except (ConnectionFailure, OperationFailure) as e:
 98 |             print_log(f"[FATAL] 🚨 MongoDB connection failed. Error: {e}")
 99 |             raise
100 | 
101 |         self.db = self.db_client[DATABASE_NAME]
102 |         self.collection = self.db[COLLECTION_NAME]
103 | 
104 |         # In-memory chat logs
105 |         self.chat_history = {}
106 |         self.current_session = "default"
107 | 
108 |         # For storing last search result sources (optional)
109 |         self.last_retrieved_sources = []
110 | 
111 |         # Initialize embeddings
112 |         print_log("--- 🧠 Initializing Embedding Clients ---")
113 |         # 1) OpenAI
114 |         self.embedding_clients["openai"] = AzureOpenAIEmbeddings(
115 |             azure_deployment=os.getenv("EMBEDDING_DEPLOYMENT_NAME", "text-embedding-ada-002"),
116 |             azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
117 |             api_key=os.getenv("AZURE_OPENAI_API_KEY")
118 |         )
119 |         print_log("[INFO] OpenAI embedding client initialized.")
120 | 
121 |         # 2) VoyageAI (if VOYAGE_API_KEY is set)
122 |         if os.getenv("VOYAGE_API_KEY"):
123 |             try:
124 |                 self.embedding_clients["voyageai"] = VoyageAIEmbeddings(
125 |                     model="voyage-2",
126 |                     voyage_api_key=os.getenv("VOYAGE_API_KEY")
127 |                 )
128 |                 print_log("[INFO] VoyageAI embedding client initialized.")
129 |             except Exception as e:
130 |                 print_log(f"[WARN] ⚠️ VoyageAI initialization failed: {e}. Skipping.")
131 |         else:
132 |             print_log("[INFO] VOYAGE_API_KEY not found. VoyageAI embeddings not available.")
133 | 
134 |         print_log("------------------------------------")
135 | 
136 | config = AgentConfig()
137 | 
138 | 
139 | # --- 3. BACKGROUND TASK SETUP ---
140 | executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
141 | tasks = {}
142 | 
143 | def run_ingestion_task(
144 |     task_id: str,
145 |     content: str,
146 |     source: str,
147 |     source_type: str,
148 |     session_id: str,
149 |     chunk_size: int,
150 |     chunk_overlap: int
151 | ):
152 |     """Handles chunking & embedding in a background thread."""
153 |     try:
154 |         tasks[task_id] = {"status": "processing", "step": "Chunking content..."}
155 |         print_log(f"[Task {task_id}] Chunking '{source}' with size {chunk_size} and overlap {chunk_overlap}...")
156 | 
157 |         text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
158 |         chunks = text_splitter.split_text(content)
159 |         if not chunks:
160 |             raise ValueError("Could not split content into any chunks.")
161 | 
162 |         tasks[task_id] = {"status": "processing", "step": "Generating embeddings..."}
163 |         print_log(f"[Task {task_id}] Generating embeddings for {len(chunks)} chunks...")
164 |         all_embeddings = _embed_chunks_parallel(chunks)
165 | 
166 |         tasks[task_id] = {"status": "processing", "step": "Saving to knowledge base..."}
167 |         print_log(f"[Task {task_id}] Saving {len(chunks)} chunks to the database...")
168 | 
169 |         to_insert = []
170 |         for i, chunk_text in enumerate(chunks):
171 |             doc = {
172 |                 "text": chunk_text,
173 |                 "metadata": {
174 |                     "source": source,
175 |                     "source_type": source_type,
176 |                     SESSION_FIELD: session_id,
177 |                     "chunk_index": i
178 |                 }
179 |             }
180 |             for model_name, emb_list in all_embeddings.items():
181 |                 if emb_list:
182 |                     vec_field = EMBEDDING_CONFIG[model_name]["vector_field"]
183 |                     doc[vec_field] = emb_list[i]
184 |             to_insert.append(doc)
185 | 
186 |         config.collection.insert_many(to_insert)
187 |         final_message = f"Successfully ingested {len(chunks)} chunks from source '{source}'."
188 |         tasks[task_id] = {"status": "complete", "message": final_message}
189 |         print_log(f"[Task {task_id}] {final_message}")
190 | 
191 |     except Exception as e:
192 |         error_message = f"Ingestion failed: {str(e)}"
193 |         print_log(f"[Task {task_id}] [ERROR] {error_message}\n{traceback.format_exc()}")
194 |         tasks[task_id] = {"status": "failed", "message": error_message}
195 | 
196 | 
197 | # --- 4. LANGCHAIN SETUP ---
198 | CHAT_DEPLOYMENT_NAME = os.getenv("CHAT_DEPLOYMENT_NAME", "gpt-4o")
199 | print_log(f"--- 🧠 Initializing LLM ---\nChat Deployment: '{CHAT_DEPLOYMENT_NAME}'\n------------------------------------")
200 | 
201 | llm = AzureChatOpenAI(
202 |     azure_deployment=CHAT_DEPLOYMENT_NAME,
203 |     api_version="2024-02-01",
204 |     azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
205 |     api_key=os.getenv("AZURE_OPENAI_API_KEY"),
206 |     temperature=0
207 | )
208 | 
209 | 
210 | # --- 5. CORE FUNCTIONS ---
211 | def _embed_chunks_parallel(chunks: List[str]) -> Dict[str, List[List[float]]]:
212 |     """Embed the text chunks in parallel for each available embedding model."""
213 |     embeddings = {}
214 |     with concurrent.futures.ThreadPoolExecutor() as inner_executor:
215 |         future_to_model = {
216 |             inner_executor.submit(client.embed_documents, chunks): model_name
217 |             for model_name, client in config.embedding_clients.items()
218 |         }
219 |         for future in concurrent.futures.as_completed(future_to_model):
220 |             model_name = future_to_model[future]
221 |             try:
222 |                 embeddings[model_name] = future.result()
223 |                 print_log(f"[INFO] Generated {len(chunks)} embeddings with {model_name}.")
224 |             except Exception as e:
225 |                 print_log(f"[ERROR] Embedding with {model_name} failed: {e}")
226 |                 embeddings[model_name] = None
227 |     return embeddings
228 | 
229 | def _update_chunk_in_db(chunk_id: str, new_content: str) -> Dict[str, Any]:
230 |     """Update chunk text and re-embed with all available models."""
231 |     oid = ObjectId(chunk_id)
232 |     update_payload = {"$set": {"text": new_content}}
233 | 
234 |     print_log(f"[INFO] Re-embedding chunk {chunk_id} with all available models...")
235 |     all_embeddings = _embed_chunks_parallel([new_content])
236 |     for model_name, embeddings_list in all_embeddings.items():
237 |         if embeddings_list:
238 |             vector_field = EMBEDDING_CONFIG[model_name]["vector_field"]
239 |             update_payload["$set"][vector_field] = embeddings_list[0]
240 | 
241 |     result = config.collection.update_one({"_id": oid}, update_payload)
242 |     if result.matched_count == 0:
243 |         raise ValueError(f"Could not find chunk with ID '{chunk_id}'.")
244 | 
245 |     return {"status": "success", "message": f"Chunk '{chunk_id}' updated (re-embedded)."}
246 | 
247 | def _delete_chunk_from_db(chunk_id: str) -> Dict[str, Any]:
248 |     """Delete a single chunk by ID."""
249 |     result = config.collection.delete_one({"_id": ObjectId(chunk_id)})
250 |     if result.deleted_count == 0:
251 |         raise ValueError(f"Could not find chunk '{chunk_id}' to delete.")
252 |     return {"status": "success", "message": f"Chunk '{chunk_id}' deleted."}
253 | 
254 | def _perform_vector_search(
255 |     query: str,
256 |     session_id: str,
257 |     embedding_model: str,
258 |     num_sources: int
259 | ) -> List[Dict]:
260 |     """Perform a vector-based search in MongoDB for top `num_sources` results."""
261 |     if embedding_model not in config.embedding_clients:
262 |         raise ValueError(f"Embedding model '{embedding_model}' is not available.")
263 | 
264 |     model_config = EMBEDDING_CONFIG[embedding_model]
265 |     embedding_client = config.embedding_clients[embedding_model]
266 |     query_vector = embedding_client.embed_query(query)
267 | 
268 |     pipeline = [
269 |         {
270 |             "$vectorSearch": {
271 |                 "index": model_config['index_name'],
272 |                 "path": model_config['vector_field'],
273 |                 "queryVector": query_vector,
274 |                 "numCandidates": num_sources * 10,
275 |                 "limit": num_sources,
276 |                 "filter": {
277 |                     f"metadata.{SESSION_FIELD}": {"$eq": session_id}
278 |                 }
279 |             }
280 |         },
281 |         {
282 |             "$project": {
283 |                 "_id": 0,
284 |                 "content": "$text",
285 |                 "source": "$metadata.source",
286 |                 "score": {"$meta": "vectorSearchScore"}
287 |             }
288 |         }
289 |     ]
290 | 
291 |     return list(config.collection.aggregate(pipeline))
292 | 
293 | 
294 | # --- 6. AGENT TOOLS ---
295 | @tool
296 | def search_knowledge_base(query: str, embedding_model: str, num_sources: int = 3, max_chunk_length: int = 2000) -> str:
297 |     """Query the knowledge base to find relevant chunks for `query`."""
298 |     try:
299 |         print_log(f"[INFO] Searching with '{embedding_model}' → top {num_sources}")
300 |         results_with_scores = _perform_vector_search(query, config.current_session, embedding_model, num_sources)
301 | 
302 |         if not results_with_scores:
303 |             config.last_retrieved_sources = []
304 |             return f"No relevant info found in session '{config.current_session}'."
305 | 
306 |         # Remember sources
307 |         found_sources = [r.get("source", "N/A") for r in results_with_scores]
308 |         config.last_retrieved_sources = list(set(found_sources))
309 | 
310 |         # Build a context string
311 |         context_parts = []
312 |         for r in results_with_scores:
313 |             text = r.get("content", "")
314 |             src = r.get("source", "N/A")
315 |             score = r.get("score", 0.0)
316 |             if max_chunk_length and len(text) > max_chunk_length:
317 |                 text = text[:max_chunk_length] + "... [truncated]"
318 |             context_parts.append(f"Source: {src} (Score: {score:.4f})\nContent: {text}")
319 | 
320 |         context = "\n---\n".join(context_parts)
321 |         return f"Retrieved from '{embedding_model}':\n{context}"
322 | 
323 |     except Exception as e:
324 |         config.last_retrieved_sources = []
325 |         print_log(f"[ERROR] search_knowledge_base: {e}")
326 |         return f"❌ Search error: {e}"
327 | 
328 | @tool
329 | def read_url(url: str, chunk_size: int=1000, chunk_overlap: int=150) -> str:
330 |     """Adds a URL's content (via r.jina.ai) into the knowledge base."""
331 |     try:
332 |         if config.collection.find_one({"metadata.source": url, f"metadata.{SESSION_FIELD}": config.current_session}):
333 |             return f"❌ Source '{url}' already exists in session '{config.current_session}'."
334 | 
335 |         jina_key = os.getenv("JINA_API_KEY")
336 |         if not jina_key:
337 |             return "❌ JINA_API_KEY not set."
338 | 
339 |         headers = {"Authorization": f"Bearer {jina_key}", "Accept": "application/json"}
340 |         print_log(f"[INFO] Reading & ingesting URL: {url}")
341 | 
342 |         resp = requests.get(f"https://r.jina.ai/{url}", headers=headers, timeout=30)
343 |         resp.raise_for_status()
344 |         page_content = resp.json().get("data", {}).get("content", "")
345 |         if not page_content:
346 |             return f"❌ No meaningful content from {url}."
347 | 
348 |         splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
349 |         chunks = splitter.split_text(page_content)
350 |         if not chunks:
351 |             return "❌ Could not split content into chunks."
352 | 
353 |         all_embeddings = _embed_chunks_parallel(chunks)
354 |         docs_to_insert = []
355 |         for i, ctext in enumerate(chunks):
356 |             doc = {
357 |                 "text": ctext,
358 |                 "metadata": {
359 |                     "source": url,
360 |                     "source_type": "url",
361 |                     SESSION_FIELD: config.current_session,
362 |                     "chunk_index": i
363 |                 }
364 |             }
365 |             for model_name, embed_list in all_embeddings.items():
366 |                 if embed_list:
367 |                     vector_field = EMBEDDING_CONFIG[model_name]["vector_field"]
368 |                     doc[vector_field] = embed_list[i]
369 |             docs_to_insert.append(doc)
370 | 
371 |         config.collection.insert_many(docs_to_insert)
372 |         return f"✅ Ingested {len(chunks)} chunks from {url} into '{config.current_session}'."
373 | 
374 |     except Exception as e:
375 |         print_log(f"[ERROR] read_url: {e}\n{traceback.format_exc()}")
376 |         return f"❌ Ingestion error: {e}"
377 | 
378 | @tool
379 | def update_chunk(chunk_id: str, new_content: str) -> str:
380 |     """Updates chunk text (and embeddings) by chunk ID."""
381 |     try:
382 |         res = _update_chunk_in_db(chunk_id, new_content)
383 |         return f"✅ {res['message']}"
384 |     except Exception as e:
385 |         return f"❌ Failed to update chunk: {e}"
386 | 
387 | @tool
388 | def delete_chunk(chunk_id: str) -> str:
389 |     """Deletes a chunk from the knowledge base by ID."""
390 |     try:
391 |         res = _delete_chunk_in_db(chunk_id)
392 |         return f"✅ {res['message']}"
393 |     except Exception as e:
394 |         return f"❌ Failed to delete chunk: {e}"
395 | 
396 | @tool
397 | def switch_session(session_id: str) -> str:
398 |     """Switch to another session in memory."""
399 |     config.current_session = session_id
400 |     if session_id not in config.chat_history:
401 |         config.chat_history[session_id] = []
402 |     return f"✅ Switched to session: **{session_id}**."
403 | 
404 | @tool
405 | def create_session(session_id: str) -> str:
406 |     """Create a new session in memory only (no marker doc)."""
407 |     existing_sessions = config.collection.distinct(f"metadata.{SESSION_FIELD}")
408 |     if session_id in existing_sessions:
409 |         return f"❌ Session **'{session_id}'** already exists."
410 | 
411 |     config.current_session = session_id
412 |     if session_id not in config.chat_history:
413 |         config.chat_history[session_id] = []
414 |     return f"✅ Created and switched to new session: **{session_id}**."
415 | 
416 | @tool
417 | def list_sources() -> str:
418 |     """List all sources in the current session."""
419 |     sources = config.collection.distinct("metadata.source", {f"metadata.{SESSION_FIELD}": config.current_session})
420 |     if not sources:
421 |         return f"No sources found in session '{config.current_session}'."
422 |     return "Sources:\n" + "\n".join(f"- {s}" for s in sources)
423 | 
424 | @tool
425 | def remove_all_sources() -> str:
426 |     """Remove all docs from the current session."""
427 |     r = config.collection.delete_many({f"metadata.{SESSION_FIELD}": config.current_session})
428 |     return f"🗑 Removed all docs from session '{config.current_session}' (deleted {r.deleted_count})."
429 | 
430 | # --- 7. AGENT PROMPT + EXECUTOR ---
431 | tools = [
432 |     search_knowledge_base,
433 |     switch_session,
434 |     create_session,
435 |     list_sources,
436 |     remove_all_sources,
437 |     update_chunk,
438 |     delete_chunk,
439 |     read_url
440 | ]
441 | 
442 | available_model_keys = list(config.embedding_clients.keys())
443 | AGENT_SYSTEM_PROMPT = (
444 |     "You are an AI assistant designed to answer questions using a private knowledge base. "
445 |     "Your primary directive is to **ALWAYS use the `search_knowledge_base` tool** to find relevant information before answering any user query. "
446 |     "**Do not answer from your general knowledge.** Your answers must be based *only* on the context provided by the `search_knowledge_base` tool. "
447 |     "If the tool returns no relevant information or the context is insufficient, you MUST state that you could not find an answer in the knowledge base. "
448 |     f"The available `embedding_model` options for the search tool are: {', '.join(available_model_keys)}. "
449 |     "For other tasks like managing sessions or sources, use the appropriate tool."
450 | )
451 | 
452 | 
453 | prompt = ChatPromptTemplate.from_messages([
454 |     ("system", AGENT_SYSTEM_PROMPT),
455 |     MessagesPlaceholder(variable_name="chat_history"),
456 |     ("human", "{input}"),
457 |     MessagesPlaceholder(variable_name="agent_scratchpad")
458 | ])
459 | 
460 | agent = create_openai_tools_agent(llm, tools, prompt)
461 | agent_executor = AgentExecutor(
462 |     agent=agent,
463 |     tools=tools,
464 |     verbose=True,
465 |     return_intermediate_steps=True
466 | )
467 | 
468 | 
469 | # --- 8. FLASK APP ---
470 | app = Flask(__name__, template_folder="templates", static_folder="static")
471 | CORS(app)
472 | 
473 | @app.route("/")
474 | def index():
475 |     return render_template("index.html")
476 | 
477 | # ---- Ingestion Endpoints ----
478 | @app.route("/ingest", methods=["POST"])
479 | def start_ingestion_task():
480 |     data = request.json
481 |     content = data.get("content")
482 |     source = data.get("source")
483 |     source_type = data.get("source_type", "unknown")
484 |     session_id = data.get("session_id")
485 |     chunk_size = data.get("chunk_size", 1000)
486 |     chunk_overlap = data.get("chunk_overlap", 150)
487 | 
488 |     if not all([content, source, session_id]):
489 |         return jsonify({"error": "Missing required fields."}), 400
490 | 
491 |     # Check duplicates
492 |     if config.collection.count_documents(
493 |         {"metadata.source": source, f"metadata.{SESSION_FIELD}": session_id},
494 |         limit=1
495 |     ) > 0:
496 |         return jsonify({"error": f"Source '{source}' already exists in session '{session_id}'."}), 409
497 | 
498 |     task_id = str(uuid.uuid4())
499 |     tasks[task_id] = {"status": "pending"}
500 | 
501 |     executor.submit(
502 |         run_ingestion_task,
503 |         task_id,
504 |         content,
505 |         source,
506 |         source_type,
507 |         session_id,
508 |         chunk_size,
509 |         chunk_overlap
510 |     )
511 | 
512 |     return jsonify({"task_id": task_id}), 202
513 | 
514 | @app.route("/ingest/status/<task_id>", methods=["GET"])
515 | def get_ingestion_status(task_id):
516 |     if task_id not in tasks:
517 |         return jsonify({"status": "not_found"}), 200
518 |     return jsonify(tasks[task_id]), 200
519 | 
520 | # ---- Chat Endpoint ----
521 | @app.route("/chat", methods=["POST"])
522 | def chat():
523 |     data = request.json
524 |     user_input = data.get("query")
525 |     session_id = data.get("session_id")
526 |     embedding_model = data.get("embedding_model", "openai")
527 |     rag_params = data.get("rag_params", {})
528 |     num_sources = rag_params.get("num_sources", config.rag_config["num_sources"])
529 |     max_chunk_length = rag_params.get("max_chunk_length", config.rag_config["max_chunk_length"])
530 | 
531 |     if not user_input or not session_id:
532 |         return jsonify({"error": "Missing 'query' or 'session_id'"}), 400
533 | 
534 |     print_log(f"\n--- Turn for session '{session_id}' ---\n")
535 |     original_session = config.current_session
536 | 
537 |     try:
538 |         # Switch session in memory
539 |         config.current_session = session_id
540 | 
541 |         # Initialize chat in memory if needed
542 |         if session_id not in config.chat_history:
543 |             config.chat_history[session_id] = []
544 | 
545 |         # Shorten chat history if too long
546 |         current_chat_history = config.chat_history[session_id]
547 |         if len(current_chat_history) > 10:
548 |             current_chat_history = current_chat_history[-10:]
549 | 
550 |         agent_input_string = (
551 |             f"User query: '{user_input}'.\n\n"
552 |             f"IMPORTANT INSTRUCTION: When you call the 'search_knowledge_base' tool, "
553 |             f"you MUST set the 'embedding_model' parameter to '{embedding_model}'."
554 |         )
555 | 
556 |         # Agent call
557 |         response = agent_executor.invoke({
558 |             "input": agent_input_string,
559 |             "chat_history": current_chat_history,
560 |             "num_sources": num_sources,
561 |             "max_chunk_length": max_chunk_length
562 |         })
563 | 
564 |         # Record the conversation
565 |         current_chat_history.extend([
566 |             HumanMessage(content=user_input),
567 |             AIMessage(content=response["output"])
568 |         ])
569 |         config.chat_history[session_id] = current_chat_history
570 |         
571 |         sources_used = config.last_retrieved_sources
572 | 
573 |         messages = [{
574 |             "type": "bot-message",
575 |             "content": response["output"],
576 |             "sources": sources_used
577 |         }]
578 | 
579 |         db_sessions = set(config.collection.distinct(f"metadata.{SESSION_FIELD}") or ["default"])
580 |         mem_sessions = set(config.chat_history.keys())
581 |         all_sessions = db_sessions.union(mem_sessions)
582 | 
583 |         resp_data = {
584 |             "messages": messages,
585 |             "session_update": {
586 |                 "all_sessions": sorted(list(all_sessions)),
587 |                 "current_session": config.current_session
588 |             }
589 |         }
590 |         return jsonify(resp_data)
591 | 
592 |     except Exception as e:
593 |         print_log(f"[ERROR] chat endpoint: {e}\n{traceback.format_exc()}")
594 |         return jsonify({"error": str(e)}), 500
595 | 
596 |     finally:
597 |         pass
598 | 
599 | # ---- Session / State Endpoints ----
600 | @app.route("/state", methods=["GET"])
601 | def get_state():
602 |     db_sessions = set(config.collection.distinct(f"metadata.{SESSION_FIELD}") or ["default"])
603 |     mem_sessions = set(config.chat_history.keys()) or {"default"}
604 |     all_sessions = db_sessions.union(mem_sessions)
605 | 
606 |     return jsonify({
607 |         "all_sessions": sorted(list(all_sessions)),
608 |         "current_session": config.current_session,
609 |         "available_embedding_models": list(config.embedding_clients.keys())
610 |     })
611 | 
612 | @app.route("/history/clear", methods=["POST"])
613 | def clear_history():
614 |     data = request.json
615 |     session_id = data.get("session_id")
616 |     if not session_id:
617 |         return jsonify({"error": "Missing 'session_id'"}), 400
618 | 
619 |     if session_id in config.chat_history:
620 |         config.chat_history[session_id] = []
621 |         msg = f"Chat history for '{session_id}' cleared."
622 |         print_log("[INFO] " + msg)
623 |         return jsonify({"status": "success", "message": msg})
624 | 
625 |     return jsonify({"status": "not_found", "message": f"Session '{session_id}' not found."}), 404
626 | 
627 | # ---- Searching / Preview Endpoints ----
628 | @app.route("/preview_search", methods=["POST"])
629 | def preview_search():
630 |     data = request.json
631 |     query = data.get("query")
632 |     session_id = data.get("session_id")
633 |     embedding_model = data.get("embedding_model", "openai")
634 |     num_sources = data.get("num_sources", 3)
635 | 
636 |     if not query or not session_id:
637 |         return jsonify({"error": "Missing required fields"}), 400
638 | 
639 |     try:
640 |         results = _perform_vector_search(query, session_id, embedding_model, num_sources)
641 |         return jsonify(results)
642 |     except Exception as e:
643 |         print_log(f"[ERROR] preview_search: {e}")
644 |         return jsonify({"error": str(e)}), 500
645 | 
646 | @app.route("/preview_file", methods=["POST"])
647 | def preview_file():
648 |     if "file" not in request.files:
649 |         return jsonify({"error": "No file part"}), 400
650 | 
651 |     file = request.files["file"]
652 |     if file.filename == "":
653 |         return jsonify({"error": "No selected file"}), 400
654 | 
655 |     _, extension = os.path.splitext(file.filename.lower())
656 |     MAX_PREVIEW = 50000
657 | 
658 |     if extension in [".txt", ".md"]:
659 |         text_data = file.read().decode("utf-8", errors="replace")
660 |         if len(text_data) > MAX_PREVIEW:
661 |             text_data = text_data[:MAX_PREVIEW] + "\n\n[TRUNCATED]"
662 |         return jsonify({"content": text_data, "filename": file.filename})
663 | 
664 |     temp_file_path = ""
665 |     try:
666 |         with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp:
667 |             file.save(tmp.name)
668 |             temp_file_path = tmp.name
669 | 
670 |         converter = DocumentConverter()
671 |         result = converter.convert(temp_file_path)
672 |         doc_text = result.document.export_to_markdown()
673 |         if len(doc_text) > MAX_PREVIEW:
674 |             doc_text = doc_text[:MAX_PREVIEW] + "\n\n[TRUNCATED]"
675 | 
676 |         return jsonify({
677 |             "content": doc_text,
678 |             "filename": file.filename
679 |         })
680 |     finally:
681 |         if temp_file_path and os.path.exists(temp_file_path):
682 |             os.unlink(temp_file_path)
683 | 
684 | @app.route("/preview_url", methods=["GET"])
685 | def preview_url():
686 |     url = request.args.get("url")
687 |     if not url:
688 |         return jsonify({"error": "URL parameter is required."}), 400
689 | 
690 |     jina_key = os.getenv("JINA_API_KEY")
691 |     if not jina_key:
692 |         return jsonify({"error": "JINA_API_KEY not set."}), 500
693 | 
694 |     headers = {"Authorization": f"Bearer {jina_key}", "Accept": "application/json"}
695 |     try:
696 |         print_log(f"[INFO] Previewing URL: {url}")
697 |         resp = requests.get(f"https://r.jina.ai/{url}", headers=headers, timeout=30)
698 |         resp.raise_for_status()
699 |         page_content = resp.json().get("data", {}).get("content", "")
700 |         MAX_PREVIEW = 50000
701 |         if len(page_content) > MAX_PREVIEW:
702 |             page_content = page_content[:MAX_PREVIEW] + "\n\n[TRUNCATED]"
703 |         return jsonify({"markdown": page_content})
704 |     except requests.exceptions.RequestException as e:
705 |         return jsonify({"error": f"Error fetching URL content: {e}"}), 500
706 |     except Exception as e:
707 |         return jsonify({"error": f"Unexpected error: {e}"}), 500
708 | 
709 | # ---- Chunk Editing ----
710 | @app.route("/chunk/<chunk_id>", methods=["DELETE"])
711 | def api_delete_chunk(chunk_id):
712 |     try:
713 |         return jsonify(_delete_chunk_from_db(chunk_id))
714 |     except Exception as e:
715 |         return jsonify({"error": str(e)}), 500
716 | 
717 | @app.route("/chunk/<chunk_id>", methods=["PUT"])
718 | def api_update_chunk(chunk_id):
719 |     new_content = request.json.get("content")
720 |     if not new_content:
721 |         return jsonify({"error": "New content is required"}), 400
722 | 
723 |     try:
724 |         return jsonify(_update_chunk_in_db(chunk_id, new_content))
725 |     except Exception as e:
726 |         return jsonify({"error": str(e)}), 500
727 | 
728 | # ---- Source Browsing ----
729 | @app.route("/sources", methods=["GET"])
730 | def get_sources():
731 |     session_id = request.args.get("session_id", "default")
732 |     pipeline = [
733 |         {"$match": {f"metadata.{SESSION_FIELD}": session_id}},
734 |         {
735 |             "$group": {
736 |                 "_id": "$metadata.source",
737 |                 "source_type": {"$first": "$metadata.source_type"},
738 |                 "chunk_count": {"$sum": 1}
739 |             }
740 |         },
741 |         {
742 |             "$project": {
743 |                 "name": "$_id",
744 |                 "type": {"$ifNull": ["$source_type", "unknown"]},
745 |                 "chunk_count": "$chunk_count",
746 |                 "_id": 0
747 |             }
748 |         },
749 |         {"$sort": {"name": 1}}
750 |     ]
751 |     return jsonify(list(config.collection.aggregate(pipeline)))
752 | 
753 | @app.route("/chunks", methods=["GET"])
754 | def get_chunks():
755 |     session_id = request.args.get("session_id", "default")
756 |     source_url = request.args.get("source_url")
757 |     if not source_url:
758 |         return jsonify({"error": "source_url required"}), 400
759 | 
760 |     cursor = config.collection.find(
761 |         {"metadata.source": source_url, f"metadata.{SESSION_FIELD}": session_id},
762 |         {"_id": 1, "text": 1}
763 |     )
764 |     return jsonify([
765 |         {"_id": str(doc["_id"]), "text": doc["text"]}
766 |         for doc in cursor
767 |     ])
768 | 
769 | # --- MODIFIED: Endpoint now returns a readable HTML page ---
770 | @app.route("/source_content", methods=["GET"])
771 | def get_source_content():
772 |     session_id = request.args.get("session_id")
773 |     source = request.args.get("source")
774 | 
775 |     if not session_id or not source:
776 |         return "<h1>Error</h1><p>Missing 'session_id' or 'source' parameter.</p>", 400
777 |     
778 |     try:
779 |         chunks_cursor = config.collection.find(
780 |             {
781 |                 f"metadata.{SESSION_FIELD}": session_id,
782 |                 "metadata.source": source
783 |             },
784 |             {"text": 1, "_id": 0}
785 |         ).sort("metadata.chunk_index", pymongo.ASCENDING)
786 | 
787 |         full_content = "".join([chunk.get('text', '') for chunk in chunks_cursor])
788 | 
789 |         if not full_content:
790 |             return "<h1>Error</h1><p>Source not found or has no content.</p>", 404
791 | 
792 |         # Return a simple, styled HTML page instead of JSON
793 |         html_template = """
794 |         <!DOCTYPE html>
795 |         <html lang="en">
796 |         <head>
797 |             <meta charset="UTF-8">
798 |             <title>{{ source_name }}</title>
799 |             <style>
800 |                 body { 
801 |                     background-color: #121826; 
802 |                     color: #e5e7eb; 
803 |                     font-family: sans-serif; 
804 |                     line-height: 1.6;
805 |                     margin: 0;
806 |                     padding: 2rem; 
807 |                 }
808 |                 .container {
809 |                     max-width: 800px;
810 |                     margin: 0 auto;
811 |                 }
812 |                 pre { 
813 |                     white-space: pre-wrap; 
814 |                     word-wrap: break-word; 
815 |                     font-family: monospace; 
816 |                     font-size: 1rem; 
817 |                     background-color: #1d2333;
818 |                     padding: 1.5rem;
819 |                     border-radius: 8px;
820 |                     border: 1px solid #333c51;
821 |                 }
822 |                 h1 { color: #00ED64; word-break: break-all; }
823 |                 a { color: #00ED64; }
824 |             </style>
825 |         </head>
826 |         <body>
827 |             <div class="container">
828 |                 <h1>Source</h1>
829 |                 <p style="word-break: break-all;">
830 |                     <a href="{{ source_name }}" target="_blank">{{ source_name }}</a>
831 |                 </p>
832 |                 <hr style="border-color: #333c51; margin: 1.5rem 0;">
833 |                 <pre>{{ content }}</pre>
834 |             </div>
835 |         </body>
836 |         </html>
837 |         """
838 |         return render_template_string(html_template, source_name=source, content=full_content)
839 | 
840 |     except Exception as e:
841 |         print_log(f"[ERROR] /source_content: {e}\n{traceback.format_exc()}")
842 |         return f"<h1>Error</h1><p>An unexpected error occurred: {str(e)}</p>", 500
843 | 
844 | 
845 | # --- 9. DB INDEX SETUP & LAUNCH ---
846 | def setup_database_and_index():
847 |     print_log("--- 🚀 Initializing DB and Vector Search Indexes ---")
848 |     if COLLECTION_NAME not in config.db.list_collection_names():
849 |         config.db.create_collection(COLLECTION_NAME)
850 | 
851 |     for model_name, model_cfg in EMBEDDING_CONFIG.items():
852 |         if model_name not in config.embedding_clients:
853 |             print_log(f"[WARN] Model '{model_name}' is not loaded, skipping index creation.")
854 |             continue
855 | 
856 |         index_name = model_cfg["index_name"]
857 |         vector_field = model_cfg["vector_field"]
858 |         dims = model_cfg["dimensions"]
859 | 
860 |         definition = {
861 |             "fields": [
862 |                 {
863 |                     "type": "vector",
864 |                     "path": vector_field,
865 |                     "numDimensions": dims,
866 |                     "similarity": "cosine"
867 |                 },
868 |                 {
869 |                     "type": "filter",
870 |                     "path": f"metadata.{SESSION_FIELD}"
871 |                 }
872 |             ]
873 |         }
874 | 
875 |         try:
876 |             existing = next(config.collection.list_search_indexes(name=index_name), None)
877 |             if not existing:
878 |                 print_log(f"[ACTION] Creating index '{index_name}' for model '{model_name}'...")
879 |                 config.collection.create_search_index(
880 |                     model=SearchIndexModel(name=index_name, type="vectorSearch", definition=definition)
881 |                 )
882 |                 print_log(f"[INFO] Finished creating index '{index_name}'.")
883 |             else:
884 |                 print_log(f"[INFO] Index '{index_name}' already exists.")
885 |         except OperationFailure as e:
886 |             if "already exists" in str(e).lower():
887 |                 print_log(f"[INFO] Index '{index_name}' already exists. OK.")
888 |             else:
889 |                 print_log(f"[ERROR] Creating index '{index_name}' failed: {e}")
890 |                 raise
891 | 
892 | @app.route("/search", methods=["POST"])
893 | def search_web():
894 |     data = request.json
895 |     query = data.get("query")
896 |     num_results = data.get("num_results", 5)
897 |     if not query:
898 |         return jsonify({"error": "Query is required"}), 400
899 |     try:
900 |         print_log(f"[INFO] Web search for: '{query}'")
901 |         with DDGS() as ddgs:
902 |             results = [r for r in ddgs.text(query, max_results=num_results)]
903 |         return jsonify({"status": "success", "results": results})
904 |     except Exception as e:
905 |         print_log(f"[ERROR] Web search failed: {e}\n{traceback.format_exc()}")
906 |         return jsonify({"error": f"Web search error: {str(e)}"}), 500
907 | 
908 | @app.route("/chunk_preview", methods=["POST"])
909 | def chunk_preview():
910 |     data = request.json
911 |     content = data.get("content")
912 |     chunk_size = data.get("chunk_size", 1000)
913 |     chunk_overlap = data.get("chunk_overlap", 150)
914 | 
915 |     if not content:
916 |         return jsonify({"error": "Content is required"}), 400
917 |     
918 |     if chunk_overlap >= chunk_size:
919 |         return jsonify({"error": "Chunk overlap must be smaller than chunk size."}), 400
920 | 
921 |     try:
922 |         text_splitter = RecursiveCharacterTextSplitter(
923 |             chunk_size=chunk_size,
924 |             chunk_overlap=chunk_overlap
925 |         )
926 |         chunks = text_splitter.split_text(content)
927 |         return jsonify({"chunks": chunks})
928 |     except Exception as e:
929 |         print_log(f"[ERROR] Chunk preview failed: {e}\n{traceback.format_exc()}")
930 |         return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
931 | 
932 | if __name__ == "__main__":
933 |     setup_database_and_index()
934 |     print_log("--- ✅ Setup complete. Starting server at http://127.0.0.1:5001 ---")
935 |     app.run(debug=True, port=5001)


--------------------------------------------------------------------------------
/rag/static/script.js:
--------------------------------------------------------------------------------
   1 | // ---------------------------
   2 | // Global state and references
   3 | // ---------------------------
   4 | let currentSessionId = "default";
   5 | let allSessions = [];
   6 | let availableModels = [];
   7 | let chunkCache = new Map();
   8 | 
   9 | // Dom references
  10 | const chatBox = document.getElementById("chat-box");
  11 | const userInput = document.getElementById("user-input");
  12 | const chatForm = document.getElementById("chat-form");
  13 | const sessionSelector = document.getElementById("session-selector");
  14 | const newSessionBtn = document.getElementById("new-session-btn");
  15 | const clearHistoryBtn = document.getElementById("clear-history-btn");
  16 | const toolButtonsContainer = document.getElementById("tool-buttons");
  17 | const thinkingIndicator = document.getElementById("thinking-indicator");
  18 | 
  19 | const embeddingModelSelector = document.getElementById("embedding-model-selector");
  20 | const numSourcesInput = document.getElementById("num-sources-input");
  21 | const minScoreInput = document.getElementById("min-score-input");
  22 | const minScoreValue = document.getElementById("min-score-value");
  23 | const maxCharsInput = document.getElementById("max-chunk-length-input");
  24 | const maxCharsValue = document.getElementById("max-chars-value");
  25 | 
  26 | const previewRagBtn = document.getElementById("preview-rag-btn");
  27 | 
  28 | // Modal references
  29 | const modalOverlay = document.getElementById("modal-overlay");
  30 | const modalContainer = document.getElementById("modal-container");
  31 | const modalTitle = document.getElementById("modal-title");
  32 | const modalText = document.getElementById("modal-text");
  33 | const modalContentHost = document.getElementById("modal-content-host");
  34 | const modalCancelBtn = document.getElementById("modal-btn-cancel");
  35 | const modalSubmitBtn = document.getElementById("modal-btn-submit");
  36 | 
  37 | // Source browser references
  38 | const sourceBrowserOverlay = document.getElementById("source-browser-overlay");
  39 | const sourceBrowserContainer = document.getElementById("source-browser-container");
  40 | const sourceBrowserCloseBtn = document.getElementById("source-browser-close-btn");
  41 | const sourceListEl = document.getElementById("source-list");
  42 | const chunkListEl = document.getElementById("chunk-list");
  43 | const chunkListPlaceholder = document.getElementById("chunk-list-placeholder");
  44 | const sourceBrowserTotalChunks = document.getElementById("source-browser-total-chunks");
  45 | const sourceBrowserSourceCount = document.getElementById("source-browser-source-count");
  46 | const sourceBrowserSelectedChunkCount = document.getElementById("source-browser-selected-chunk-count");
  47 | 
  48 | // -----------
  49 | // Modal Logic
  50 | // -----------
  51 | function showModal({ title, text, contentHTML, onSubmit, onCancel }) {
  52 |  modalTitle.textContent = title || "Modal Title";
  53 |  modalText.textContent = text || "";
  54 |  modalContentHost.innerHTML = contentHTML || "";
  55 | 
  56 |  if (onSubmit) {
  57 |   modalSubmitBtn.onclick = () => {
  58 |    onSubmit();
  59 |   };
  60 |  } else {
  61 |   modalSubmitBtn.onclick = () => {
  62 |    hideModal();
  63 |   };
  64 |  }
  65 | 
  66 |  if (onCancel) {
  67 |   modalCancelBtn.onclick = () => {
  68 |    onCancel();
  69 |   };
  70 |  } else {
  71 |   modalCancelBtn.onclick = () => {
  72 |    hideModal();
  73 |   };
  74 |  }
  75 | 
  76 |  modalOverlay.classList.remove("invisible", "opacity-0");
  77 |  modalContainer.classList.remove("scale-95", "opacity-0");
  78 | }
  79 | 
  80 | function hideModal() {
  81 |  modalOverlay.classList.add("opacity-0", "invisible");
  82 |  modalContainer.classList.add("scale-95", "opacity-0");
  83 | 
  84 |  modalTitle.textContent = "";
  85 |  modalText.textContent = "";
  86 |  modalContentHost.innerHTML = "";
  87 |  modalSubmitBtn.onclick = null;
  88 |  modalCancelBtn.onclick = null;
  89 | }
  90 | 
  91 | modalOverlay.addEventListener("click", (e) => {
  92 |  if (e.target === modalOverlay) {
  93 |   hideModal();
  94 |  }
  95 | });
  96 | 
  97 | // ------------------------
  98 | // Source Browser Functions
  99 | // ------------------------
 100 | function openSourceBrowser() {
 101 |  sourceBrowserOverlay.classList.remove("opacity-0", "invisible");
 102 |  sourceBrowserContainer.classList.remove("scale-95", "opacity-0");
 103 |  sourceBrowserTotalChunks.textContent = "";
 104 |  sourceBrowserSourceCount.textContent = "Total: 0";
 105 |  sourceBrowserSelectedChunkCount.textContent = "Selected: 0";
 106 | 
 107 |  fetch(`/sources?session_id=${encodeURIComponent(currentSessionId)}`)
 108 |   .then((r) => r.json())
 109 |   .then((data) => {
 110 |    sourceListEl.innerHTML = "";
 111 |    chunkListEl.innerHTML = "";
 112 |    chunkListPlaceholder.style.display = "block";
 113 |   
 114 |    if (!data || data.length === 0) {
 115 |     sourceListEl.innerHTML = "<p class='text-gray-400 text-sm p-2'>No sources found.</p>";
 116 |     return;
 117 |    }
 118 | 
 119 |    sourceBrowserSourceCount.textContent = `Total: ${data.length}`;
 120 |    let totalChunks = 0;
 121 |    data.forEach(src => totalChunks += (src.chunk_count || 0));
 122 |    sourceBrowserTotalChunks.textContent = `(${totalChunks.toLocaleString()} Total Chunks)`;
 123 |   
 124 |    data.forEach((src) => {
 125 |     const btn = document.createElement("button");
 126 |     btn.className = "source-item";
 127 |    
 128 |     const chunkCount = src.chunk_count !== undefined ? `${src.chunk_count}` : '?';
 129 |     const sourceName = src.name + (src.type ? ` (${src.type})` : "");
 130 | 
 131 |     btn.innerHTML = `
 132 |      <span class="truncate pr-2" title="${escapeHtml(sourceName)}">${escapeHtml(sourceName)}</span>
 133 |      <span class="sb-count-badge flex-shrink-0">${chunkCount}</span>
 134 |     `;
 135 | 
 136 |     btn.onclick = () => {
 137 |      document.querySelectorAll('.source-item').forEach(b => b.classList.remove('active'));
 138 |      btn.classList.add('active');
 139 |      loadChunksForSource(src.name);
 140 |     };
 141 |     sourceListEl.appendChild(btn);
 142 |    });
 143 |   })
 144 |   .catch((err) => {
 145 |    console.error("Failed to list sources:", err);
 146 |    sourceListEl.innerHTML = `<p class='text-red-500 p-2'>Error: ${err.message}</p>`;
 147 |   });
 148 | }
 149 | 
 150 | function loadChunksForSource(sourceUrl) {
 151 |   sourceBrowserSelectedChunkCount.textContent = "Loading...";
 152 |   fetch(`/chunks?session_id=${encodeURIComponent(currentSessionId)}&source_url=` + encodeURIComponent(sourceUrl))
 153 |    .then((r) => r.json())
 154 |    .then((data) => {
 155 |     chunkListEl.innerHTML = "";
 156 |     chunkCache.clear();
 157 | 
 158 |     if (data.error) {
 159 |      chunkListEl.innerHTML = `<p class='text-red-500'>Error: ${data.error}</p>`;
 160 |      sourceBrowserSelectedChunkCount.textContent = "Error";
 161 |      return;
 162 |     }
 163 |     if (!data || data.length === 0) {
 164 |      chunkListEl.innerHTML = "<p class='text-gray-400 text-sm'>No chunks found for this source.</p>";
 165 |      sourceBrowserSelectedChunkCount.textContent = "Selected: 0";
 166 |      return;
 167 |     }
 168 | 
 169 |     sourceBrowserSelectedChunkCount.textContent = `Selected: ${data.length}`;
 170 | 
 171 |     chunkListPlaceholder.style.display = "none";
 172 |     data.forEach((ch) => {
 173 |      chunkCache.set(ch._id, ch);
 174 |      const card = document.createElement("div");
 175 |      card.className = "chunk-card";
 176 |      card.setAttribute("data-chunk-id", ch._id); // Add ID for easier selection
 177 |      card.innerHTML = `
 178 |        <div class="chunk-header">
 179 |          <div class="chunk-title">Chunk ID: ${ch._id}</div>
 180 |          <div class="chunk-actions flex gap-2">
 181 |            <button data-id="${ch._id}" class="chunk-edit-btn text-xs bg-blue-500 hover:bg-blue-600 px-2 py-1 rounded">Edit</button>
 182 |            <button data-id="${ch._id}" class="chunk-delete-btn text-xs bg-red-600 hover:bg-red-700 px-2 py-1 rounded">Delete</button>
 183 |          </div>
 184 |        </div>
 185 |        <div class="chunk-content prose prose-invert max-w-none prose-sm">${marked.parse(ch.text || "")}</div>
 186 |      `;
 187 |      chunkListEl.appendChild(card);
 188 |     });
 189 |    })
 190 |    .catch((err) => {
 191 |     console.error("Failed to load chunks:", err);
 192 |     chunkListEl.innerHTML = `<p class='text-red-500'>Error: ${err.message}</p>`;
 193 |     sourceBrowserSelectedChunkCount.textContent = "Error";
 194 |    });
 195 | }
 196 | 
 197 | // -----------------------------------------------------------
 198 | // CORRECTED: A single, robust event listener for all chunk buttons
 199 | // (Handles Edit, Delete, Save, and Cancel)
 200 | // -----------------------------------------------------------
 201 | chunkListEl.addEventListener('click', (event) => {
 202 |     const editButton = event.target.closest('.chunk-edit-btn');
 203 |     if (editButton) {
 204 |         const chunkId = editButton.getAttribute('data-id');
 205 |         startChunkEdit(chunkId);
 206 |         return;
 207 |     }
 208 | 
 209 |     const deleteButton = event.target.closest('.chunk-delete-btn');
 210 |     if (deleteButton) {
 211 |         const chunkId = deleteButton.getAttribute('data-id');
 212 |         onDeleteChunkClick(chunkId); // Uses existing delete logic
 213 |         return;
 214 |     }
 215 | 
 216 |     const saveButton = event.target.closest('.chunk-save-btn');
 217 |     if (saveButton) {
 218 |         const chunkId = saveButton.getAttribute('data-id');
 219 |         saveChunkEdit(chunkId);
 220 |         return;
 221 |     }
 222 | 
 223 |     const cancelButton = event.target.closest('.chunk-cancel-btn');
 224 |     if (cancelButton) {
 225 |         const chunkId = cancelButton.getAttribute('data-id');
 226 |         cancelChunkEdit(chunkId);
 227 |         return;
 228 |     }
 229 | });
 230 | 
 231 | 
 232 | sourceBrowserCloseBtn.addEventListener("click", () => {
 233 |  closeSourceBrowser();
 234 | });
 235 | 
 236 | function closeSourceBrowser() {
 237 |  sourceBrowserOverlay.classList.add("opacity-0", "invisible");
 238 |  sourceBrowserContainer.classList.add("scale-95", "opacity-0");
 239 |  sourceListEl.innerHTML = "";
 240 |  chunkListEl.innerHTML = "";
 241 |  chunkListPlaceholder.style.display = "block";
 242 |  sourceBrowserTotalChunks.textContent = "";
 243 |  sourceBrowserSourceCount.textContent = "Total: 0";
 244 |  sourceBrowserSelectedChunkCount.textContent = "Selected: 0";
 245 | }
 246 | 
 247 | // -------------------------------------------------
 248 | // --- NEW IN-PLACE CHUNK EDITING LOGIC ---
 249 | // -------------------------------------------------
 250 | 
 251 | function startChunkEdit(chunkId) {
 252 |     const chunkCard = chunkListEl.querySelector(`.chunk-card[data-chunk-id='${chunkId}']`);
 253 |     if (!chunkCard || chunkCard.classList.contains('is-editing')) return;
 254 | 
 255 |     const chunkData = chunkCache.get(chunkId);
 256 |     if (!chunkData) {
 257 |         alert("Error: Could not find chunk data to edit.");
 258 |         return;
 259 |     }
 260 | 
 261 |     chunkCard.classList.add('is-editing');
 262 |     const contentHost = chunkCard.querySelector('.chunk-content');
 263 |     const actionsHost = chunkCard.querySelector('.chunk-actions');
 264 | 
 265 |     // Store original HTML for cancellation
 266 |     chunkCard.dataset.originalContent = contentHost.innerHTML;
 267 |     chunkCard.dataset.originalActions = actionsHost.innerHTML;
 268 | 
 269 |     // Inject the textarea and new buttons
 270 |     contentHost.innerHTML = `
 271 |         <textarea class="chunk-edit-textarea w-full bg-gray-900 text-gray-200 p-2 rounded font-mono text-sm resize-y focus:outline-none focus:ring-2 focus:ring-mongodb-green-500">${escapeHtmlForTextarea(chunkData.text)}</textarea>
 272 |     `;
 273 |     actionsHost.innerHTML = `
 274 |         <button data-id="${chunkId}" class="chunk-cancel-btn text-xs bg-gray-600 hover:bg-gray-700 px-3 py-1 rounded">Cancel</button>
 275 |         <button data-id="${chunkId}" class="chunk-save-btn text-xs bg-green-600 hover:bg-green-700 px-3 py-1 rounded">Save</button>
 276 |     `;
 277 | 
 278 |     // Auto-resize and focus the textarea
 279 |     const textarea = contentHost.querySelector('textarea');
 280 |     const autoResize = () => {
 281 |         textarea.style.height = 'auto';
 282 |         textarea.style.height = (textarea.scrollHeight) + 'px';
 283 |     };
 284 |     textarea.addEventListener('input', autoResize);
 285 |     autoResize();
 286 |     textarea.focus();
 287 | }
 288 | 
 289 | function cancelChunkEdit(chunkId) {
 290 |     const chunkCard = chunkListEl.querySelector(`.chunk-card[data-chunk-id='${chunkId}']`);
 291 |     if (!chunkCard || !chunkCard.classList.contains('is-editing')) return;
 292 | 
 293 |     const contentHost = chunkCard.querySelector('.chunk-content');
 294 |     const actionsHost = chunkCard.querySelector('.chunk-actions');
 295 | 
 296 |     // Restore original content from dataset
 297 |     contentHost.innerHTML = chunkCard.dataset.originalContent;
 298 |     actionsHost.innerHTML = chunkCard.dataset.originalActions;
 299 | 
 300 |     chunkCard.classList.remove('is-editing');
 301 |     delete chunkCard.dataset.originalContent;
 302 |     delete chunkCard.dataset.originalActions;
 303 | }
 304 | 
 305 | function saveChunkEdit(chunkId) {
 306 |     const chunkCard = chunkListEl.querySelector(`.chunk-card[data-chunk-id='${chunkId}']`);
 307 |     if (!chunkCard) return;
 308 | 
 309 |     const textarea = chunkCard.querySelector('.chunk-edit-textarea');
 310 |     const newText = textarea.value;
 311 |     const saveBtn = chunkCard.querySelector('.chunk-save-btn');
 312 |     saveBtn.textContent = 'Saving...';
 313 |     saveBtn.disabled = true;
 314 | 
 315 |     fetch("/chunk/" + encodeURIComponent(chunkId), {
 316 |         method: "PUT",
 317 |         headers: { "Content-Type": "application/json" },
 318 |         body: JSON.stringify({ content: newText }),
 319 |     })
 320 |     .then(r => r.json())
 321 |     .then(resp => {
 322 |         if (resp.error) {
 323 |             alert("Error updating chunk: " + resp.error);
 324 |             saveBtn.textContent = 'Save';
 325 |             saveBtn.disabled = false; // Re-enable on failure
 326 |             return;
 327 |         }
 328 |         // Update local cache
 329 |         const chunkData = chunkCache.get(chunkId);
 330 |         chunkData.text = newText;
 331 |         chunkCache.set(chunkId, chunkData);
 332 | 
 333 |         // Restore view mode with the *new* content
 334 |         const contentHost = chunkCard.querySelector('.chunk-content');
 335 |         const actionsHost = chunkCard.querySelector('.chunk-actions');
 336 |         
 337 |         contentHost.innerHTML = marked.parse(newText);
 338 |         actionsHost.innerHTML = chunkCard.dataset.originalActions; // Restore original buttons
 339 | 
 340 |         chunkCard.classList.remove('is-editing');
 341 |         delete chunkCard.dataset.originalContent;
 342 |         delete chunkCard.dataset.originalActions;
 343 |     })
 344 |     .catch(err => {
 345 |         alert("Error updating chunk: " + err.message);
 346 |         saveBtn.textContent = 'Save';
 347 |         saveBtn.disabled = false;
 348 |     });
 349 | }
 350 | 
 351 | function onDeleteChunkClick(chunkId) {
 352 |  if (!confirm("Are you sure you want to delete this chunk?")) return;
 353 |  fetch(`/chunk/${encodeURIComponent(chunkId)}`, { method: "DELETE" })
 354 |   .then((r) => r.json())
 355 |   .then((resp) => {
 356 |    if (resp.error) {
 357 |     alert("Error deleting chunk: " + resp.error);
 358 |     return;
 359 |    }
 360 |    const chunkCard = chunkListEl.querySelector(`.chunk-card[data-chunk-id='${chunkId}']`);
 361 |    if (chunkCard) {
 362 |     chunkCard.remove();
 363 |     chunkCache.delete(chunkId);
 364 |    }
 365 |   })
 366 |   .catch((err) => {
 367 |    alert("Error deleting chunk: " + err.message);
 368 |   });
 369 | }
 370 | 
 371 | // --------
 372 | // Helpers
 373 | // --------
 374 | function escapeHtml(unsafe) {
 375 |  if (!unsafe) return "";
 376 |  return unsafe.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#039;");
 377 | }
 378 | 
 379 | function escapeHtmlForTextarea(str) {
 380 |  return str.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;");
 381 | }
 382 | 
 383 | // ---------------------------------------------
 384 | // Chat Rendering (add messages to the chat box)
 385 | // ---------------------------------------------
 386 | function addBotMessage(message) {
 387 |  const content = message.content;
 388 |  const sources = message.sources || [];
 389 | 
 390 |  const messageEl = document.createElement("div");
 391 |  messageEl.className = "message bot-message flex flex-col p-4 bg-gray-700 rounded-lg animate-fade-in-up";
 392 | 
 393 |  const contentDiv = document.createElement("div");
 394 |  contentDiv.className = "prose prose-invert max-w-none";
 395 | 
 396 |  if (content.trim().startsWith('<div')) {
 397 |   contentDiv.innerHTML = content;
 398 |  } else {
 399 |   contentDiv.innerHTML = marked.parse(content || "");
 400 |  }
 401 |  messageEl.appendChild(contentDiv);
 402 | 
 403 |  if (sources.length > 0) {
 404 |   let sourceLinksHTML = sources.map(source => {
 405 |    const href = `/source_content?session_id=${encodeURIComponent(currentSessionId)}&source=${encodeURIComponent(source)}`;
 406 |    const target = `target="_blank" rel="noopener noreferrer"`;
 407 |   
 408 |    let displayName = source;
 409 |    try {
 410 |     if (source.startsWith('http')) displayName = new URL(source).hostname;
 411 |    } catch (e) { /* use original source name */ }
 412 | 
 413 |    return `
 414 |     <a href="${href}" ${target} title="View full source: ${escapeHtml(source)}">
 415 |      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" class="w-4 h-4">
 416 |       <path d="M8.75 3.75a.75.75 0 0 0-1.5 0v1.5h-1.5a.75.75 0 0 0 0 1.5h1.5v1.5a.75.75 0 0 0 1.5 0v-1.5h1.5a.75.75 0 0 0 0-1.5h-1.5v-1.5Z" />
 417 |       <path fill-rule="evenodd" d="M3 1.75C3 .784 3.784 0 4.75 0h6.5C12.216 0 13 .784 13 1.75v12.5A1.75 1.75 0 0 1 11.25 16h-6.5A1.75 1.75 0 0 1 3 14.25V1.75Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h6.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25h-6.5Z" clip-rule="evenodd" />
 418 |      </svg>
 419 |      <span>${escapeHtml(displayName)}</span>
 420 |     </a>
 421 |    `;
 422 |   }).join('');
 423 | 
 424 |   const sourcesContainer = document.createElement("div");
 425 |   sourcesContainer.className = "source-links mt-4 pt-4 border-t border-gray-600";
 426 |   sourcesContainer.innerHTML = `
 427 |     <h4 class="text-xs font-bold uppercase text-gray-400 mb-2">Sources</h4>
 428 |     <div class="flex flex-wrap gap-2">
 429 |       ${sourceLinksHTML}
 430 |     </div>
 431 |   `;
 432 |   messageEl.appendChild(sourcesContainer);
 433 |  }
 434 | 
 435 |  chatBox.appendChild(messageEl);
 436 |  chatBox.scrollTop = chatBox.scrollHeight;
 437 | }
 438 | 
 439 | function addUserMessage(content) {
 440 |  const messageEl = document.createElement("div");
 441 |  messageEl.className = "message user-message bg-gray-600 p-3 rounded-lg animate-fade-in-up text-right";
 442 |  messageEl.textContent = content;
 443 |  chatBox.appendChild(messageEl);
 444 |  chatBox.scrollTop = chatBox.scrollHeight;
 445 | }
 446 | 
 447 | function addSystemMessage(content) {
 448 |  const div = document.createElement("div");
 449 |  div.className = "message system-message bg-yellow-900/50 text-yellow-300 border-l-4 border-yellow-500 p-3 rounded-r-lg animate-fade-in-up";
 450 |  div.innerHTML = `<strong>System:</strong> ${content}`;
 451 |  chatBox.appendChild(div);
 452 |  chatBox.scrollTop = chatBox.scrollHeight;
 453 | }
 454 | 
 455 | function setThinking(isThinking) {
 456 |  const indicator = document.getElementById("thinking-indicator");
 457 |  if (isThinking) {
 458 |   indicator.classList.remove("invisible", "opacity-0");
 459 |   chatBox.scrollTop = chatBox.scrollHeight;
 460 |  } else {
 461 |   indicator.classList.add("invisible", "opacity-0");
 462 |  }
 463 | }
 464 | 
 465 | // -------------------------
 466 | // Session / State Functions
 467 | // -------------------------
 468 | function loadSessionsAndState() {
 469 |  fetch("/state")
 470 |   .then((r) => r.json())
 471 |   .then((data) => {
 472 |    allSessions = data.all_sessions || [];
 473 |    availableModels = data.available_embedding_models || [];
 474 |    currentSessionId = data.current_session || "default";
 475 |   
 476 |    sessionSelector.innerHTML = "";
 477 |    allSessions.forEach((s) => {
 478 |     const opt = document.createElement("option");
 479 |     opt.value = s;
 480 |     opt.textContent = s;
 481 |     if (s === currentSessionId) {
 482 |      opt.selected = true;
 483 |     }
 484 |     sessionSelector.appendChild(opt);
 485 |    });
 486 |   
 487 |    const selectedModel = embeddingModelSelector.value;
 488 |    embeddingModelSelector.innerHTML = "";
 489 |    availableModels.forEach((m) => {
 490 |     const opt = document.createElement("option");
 491 |     opt.value = m;
 492 |     opt.textContent = m;
 493 |     embeddingModelSelector.appendChild(opt);
 494 |    });
 495 |    if (selectedModel && availableModels.includes(selectedModel)) {
 496 |      embeddingModelSelector.value = selectedModel;
 497 |    }
 498 |   })
 499 |   .catch((err) => {
 500 |    console.error("Failed to load state:", err);
 501 |   });
 502 | }
 503 | 
 504 | function switchSession(sessionId) {
 505 |  fetch("/chat", {
 506 |   method: "POST",
 507 |   headers: { "Content-Type": "application/json" },
 508 |   body: JSON.stringify({
 509 |    query: `switch_session ${sessionId}`,
 510 |    session_id: currentSessionId,
 511 |   }),
 512 |  })
 513 |   .then((r) => r.json())
 514 |   .then((data) => {
 515 |    if (data.error) {
 516 |     console.error("Error switching session:", data.error);
 517 |    } else {
 518 |     loadSessionsAndState();
 519 |     chatBox.innerHTML = '';
 520 |      const welcomeDiv = document.createElement("div");
 521 |      welcomeDiv.className = "message system-message animate-fade-in-up bg-yellow-900/50 text-yellow-300 border-l-4 border-yellow-500 p-4 rounded-r-lg";
 522 |      welcomeDiv.innerHTML = `<b>Switched to session: ${sessionId}</b>`;
 523 |      chatBox.appendChild(welcomeDiv);
 524 |    }
 525 |   })
 526 |   .catch((err) => console.error("Failed to switch session:", err));
 527 | }
 528 | 
 529 | function createSession(newSessionName) {
 530 |  fetch("/chat", {
 531 |   method: "POST",
 532 |   headers: { "Content-Type": "application/json" },
 533 |   body: JSON.stringify({
 534 |    query: `create_session ${newSessionName}`,
 535 |    session_id: currentSessionId,
 536 |   }),
 537 |  })
 538 |   .then((r) => r.json())
 539 |   .then((data) => {
 540 |    if (data.error) {
 541 |     console.error("Error creating session:", data.error);
 542 |     alert("Error creating session: " + data.error);
 543 |    } else {
 544 |     addSystemMessage(`Created and switched to new session: ${newSessionName}`);
 545 |     loadSessionsAndState();
 546 |    }
 547 |   })
 548 |   .catch((err) => console.error("Failed to create session:", err));
 549 | }
 550 | 
 551 | // ------
 552 | // Events
 553 | // ------
 554 | document.addEventListener("DOMContentLoaded", () => {
 555 |  loadSessionsAndState();
 556 | });
 557 | 
 558 | sessionSelector.addEventListener("change", () => {
 559 |  const sel = sessionSelector.value;
 560 |  if (sel !== currentSessionId) {
 561 |   switchSession(sel);
 562 |  }
 563 | });
 564 | 
 565 | newSessionBtn.addEventListener("click", () => {
 566 |  const name = prompt("Enter new session name:");
 567 |  if (name) {
 568 |   createSession(name.trim());
 569 |  }
 570 | });
 571 | 
 572 | clearHistoryBtn.addEventListener("click", () => {
 573 |  if (!confirm("Clear chat history for this session?")) return;
 574 |  fetch("/history/clear", {
 575 |   method: "POST",
 576 |   headers: { "Content-Type": "application/json" },
 577 |   body: JSON.stringify({ session_id: currentSessionId }),
 578 |  })
 579 |   .then((r) => r.json())
 580 |   .then((data) => {
 581 |    if (data.error) {
 582 |     console.error("Error clearing history:", data.error);
 583 |    } else {
 584 |     chatBox.innerHTML = "";
 585 |     const welcomeDiv = document.createElement("div");
 586 |     welcomeDiv.className = "message system-message animate-fade-in-up bg-yellow-900/50 text-yellow-300 border-l-4 border-yellow-500 p-4 rounded-r-lg";
 587 |     welcomeDiv.innerHTML = "<b>Welcome!</b> Use the Control Panel on the right to manage sessions, add data, and fine-tune retrieval settings.";
 588 |     chatBox.appendChild(welcomeDiv);
 589 |    }
 590 |   })
 591 |   .catch((err) => console.error("Failed to clear history:", err));
 592 | });
 593 | 
 594 | chatForm.addEventListener("submit", (event) => {
 595 |  event.preventDefault();
 596 |  const text = userInput.value.trim();
 597 |  if (!text) return;
 598 | 
 599 |  addUserMessage(text);
 600 |  setThinking(true);
 601 | 
 602 |  const embeddingModel = embeddingModelSelector.value;
 603 |  const numSources = parseInt(numSourcesInput.value) || 3;
 604 |  const maxChunkLen = parseInt(maxCharsInput.value) || 2000;
 605 | 
 606 |  const payload = {
 607 |   query: text,
 608 |   session_id: currentSessionId,
 609 |   embedding_model: embeddingModel,
 610 |   rag_params: {
 611 |    num_sources: numSources,
 612 |    max_chunk_length: maxChunkLen,
 613 |   },
 614 |  };
 615 | 
 616 |  fetch("/chat", {
 617 |   method: "POST",
 618 |   headers: { "Content-Type": "application/json" },
 619 |   body: JSON.stringify(payload),
 620 |  })
 621 |   .then((r) => r.json())
 622 |   .then((data) => {
 623 |    if (data.error) {
 624 |     addBotMessage({ content: `Error: ${data.error}` });
 625 |     return;
 626 |    }
 627 |    const msgs = data.messages || [];
 628 |    msgs.forEach((m) => {
 629 |     if (m.type === "bot-message") {
 630 |      addBotMessage(m);
 631 |     } else if (m.type === "system-message") {
 632 |      addSystemMessage(m.content);
 633 |     }
 634 |    });
 635 |    if (data.session_update) {
 636 |      loadSessionsAndState();
 637 |    }
 638 |   })
 639 |   .catch((err) => {
 640 |    addBotMessage({ content: `Error: ${err.message}` });
 641 |   })
 642 |   .finally(() => {
 643 |    setThinking(false);
 644 |    userInput.value = "";
 645 |    userInput.focus();
 646 |    userInput.style.height = 'auto';
 647 |   });
 648 | });
 649 | 
 650 |  userInput.addEventListener('input', () => {
 651 |    userInput.style.height = 'auto';
 652 |    userInput.style.height = (userInput.scrollHeight) + 'px';
 653 |  });
 654 | 
 655 | userInput.addEventListener("keydown", (event) => {
 656 |  if (event.key === "Enter" && !event.shiftKey) {
 657 |   event.preventDefault();
 658 |   chatForm.dispatchEvent(new Event('submit'));
 659 |  }
 660 | });
 661 | 
 662 | toolButtonsContainer.addEventListener("click", (event) => {
 663 |  const btn = event.target.closest("button[data-action]");
 664 |  if (!btn) return;
 665 |  const action = btn.getAttribute("data-action");
 666 |  handleToolAction(action);
 667 | });
 668 | 
 669 | function handleToolAction(action) {
 670 |  if (action === "read_url") {
 671 |   handleReadUrlAndChunking();
 672 |  } else if (action === "read_file") {
 673 |   handleReadFile();
 674 |  } else if (action === "browse_sources") {
 675 |   openSourceBrowser();
 676 |  } else if (action === "search_web") {
 677 |   handleWebSearch();
 678 |  } else if (action === "list_sources" || action === "remove_all") {
 679 |    const command = action === "list_sources" ? "list_sources" : "remove_all_sources";
 680 |    if (action === "remove_all" && !confirm("Are you sure you want to remove all sources in this session?")) {
 681 |      return;
 682 |    }
 683 | 
 684 |    addUserMessage(command);
 685 |    setThinking(true);
 686 |    fetch("/chat", {
 687 |      method: "POST",
 688 |      headers: { "Content-Type": "application/json" },
 689 |      body: JSON.stringify({ query: command, session_id: currentSessionId }),
 690 |    })
 691 |    .then(r => r.json())
 692 |    .then(data => {
 693 |      if (data.error) {
 694 |        addBotMessage({ content: `Error: ${data.error}` });
 695 |      } else {
 696 |        (data.messages || []).forEach(m => {
 697 |          if (m.type === "bot-message" || m.type === "system-message") {
 698 |            addBotMessage(m);
 699 |          }
 700 |        });
 701 |      }
 702 |    })
 703 |    .catch(err => addBotMessage({ content: `Error: ${err.message}` }))
 704 |    .finally(() => setThinking(false));
 705 |  }
 706 | }
 707 | 
 708 | // ------------------------------------
 709 | // --- NEW INGESTION MODAL LOGIC ---
 710 | // ------------------------------------
 711 | 
 712 | async function renderChunkPreview(content, chunkSize, chunkOverlap, targetElementId, countElementId) {
 713 |   const targetEl = document.getElementById(targetElementId);
 714 |   const countEl = document.getElementById(countElementId);
 715 |   if (!targetEl || !countEl) return;
 716 | 
 717 |   targetEl.innerHTML = '<div class="flex justify-center items-center h-full"><div class="spinner-large"></div></div>';
 718 |   countEl.textContent = 'Total Chunks: ...';
 719 | 
 720 |   if (chunkOverlap >= chunkSize) {
 721 |     targetEl.innerHTML = '<p class="text-red-500 p-4 text-center">Error: Chunk overlap must be smaller than chunk size.</p>';
 722 |     countEl.textContent = 'Total Chunks: 0';
 723 |     return false;
 724 |   }
 725 | 
 726 |   try {
 727 |     const response = await fetch("/chunk_preview", {
 728 |       method: "POST",
 729 |       headers: { "Content-Type": "application/json" },
 730 |       body: JSON.stringify({ content, chunk_size: chunkSize, chunk_overlap: chunkOverlap }),
 731 |     });
 732 |     const data = await response.json();
 733 | 
 734 |     if (data.error) {
 735 |       targetEl.innerHTML = `<p class="text-red-500 p-4 text-center">Error chunking: ${escapeHtml(data.error)}</p>`;
 736 |       countEl.textContent = 'Total Chunks: 0';
 737 |       return false;
 738 |     }
 739 | 
 740 |     if (!data.chunks || data.chunks.length === 0) {
 741 |       targetEl.innerHTML = '<p class="text-gray-400 p-4 text-center">Could not generate any chunks from the source content.</p>';
 742 |       countEl.textContent = 'Total Chunks: 0';
 743 |       return false;
 744 |     }
 745 | 
 746 |     const chunkHtml = data.chunks.map((c, i) => `
 747 |           <div class="chunk-card">
 748 |             <div class="chunk-header"><div class="chunk-title">Chunk ${i + 1}</div></div>
 749 |             <div class="chunk-content">${escapeHtml(c)}</div>
 750 |           </div>
 751 |     `).join('');
 752 |    
 753 |     targetEl.innerHTML = `<div class="chunk-list-container animate-fade-in-up">${chunkHtml}</div>`;
 754 |     countEl.textContent = `Total Chunks: ${data.chunks.length}`;
 755 |     return true;
 756 |   } catch (err) {
 757 |     targetEl.innerHTML = `<p class="text-red-500 p-4 text-center">Request error: ${escapeHtml(err.message)}</p>`;
 758 |     countEl.textContent = 'Total: 0';
 759 |     return false;
 760 |   }
 761 | }
 762 | 
 763 | function handleReadFile() {
 764 |   let sourceName = '';
 765 |   let currentFile = null;
 766 | 
 767 |   const modalHTML = `
 768 |     <div id="file-drop-zone" class="w-full p-6 border-2 border-dashed border-gray-600 rounded-lg text-center cursor-pointer hover:border-mongodb-green-500 transition-all duration-200">
 769 |       <input type="file" id="ingestion-file-input" class="hidden" />
 770 |       <div id="file-drop-zone-prompt" class="flex flex-col items-center justify-center text-gray-400">
 771 |          <svg xmlns="http://www.w3.org/2000/svg" class="w-12 h-12 mb-2" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="1.5">
 772 |            <path stroke-linecap="round" stroke-linejoin="round" d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M12 15l-4-4m0 0l4-4m-4 4h12" />
 773 |          </svg>
 774 |         <p class="font-semibold text-gray-200">Drag & drop your file here</p>
 775 |         <p class="text-sm">or click to browse</p>
 776 |       </div>
 777 |       <div id="file-drop-zone-display" class="hidden flex-col items-center justify-center">
 778 |          <svg xmlns="http://www.w3.org/2000/svg" class="w-12 h-12 mb-2 text-mongodb-green-500" viewBox="0 0 20 20" fill="currentColor">
 779 |              <path fill-rule="evenodd" d="M4 4a2 2 0 012-2h4.586A2 2 0 0112 2.586L15.414 6A2 2 0 0116 7.414V16a2 2 0 01-2 2H6a2 2 0 01-2-2V4zm2 6a1 1 0 011-1h6a1 1 0 110 2H7a1 1 0 01-1-1zm1 3a1 1 0 100 2h6a1 1 0 100-2H7z" clip-rule="evenodd" />
 780 |          </svg>
 781 |          <p id="file-name-display" class="font-semibold text-gray-200"></p>
 782 |          <p class="text-sm text-gray-400">Click again or drop another file to replace</p>
 783 |       </div>
 784 |     </div>
 785 |     <div class="flex gap-4 mt-4 h-[50vh]">
 786 |       <div class="w-1/2 flex flex-col bg-gray-900/50 rounded-lg">
 787 |         <h4 class="font-bold text-mongodb-green-500 border-b border-gray-700 p-3">Source Content (Editable)</h4>
 788 |         <div class="flex-grow p-1">
 789 |          <textarea id="ingestion-source-content-textarea" class="w-full h-full bg-transparent text-gray-200 p-2 rounded-md resize-none focus:outline-none focus:ring-1 focus:ring-mongodb-green-500" placeholder="Select a file to begin..."></textarea>
 790 |         </div>
 791 |       </div>
 792 |       <div class="w-1/2 flex flex-col bg-gray-900/50 rounded-lg">
 793 |         <div class="flex justify-between items-center border-b border-gray-700 p-3">
 794 |           <h4 class="font-bold text-mongodb-green-500">Chunk Preview</h4>
 795 |           <span id="ingestion-chunk-count" class="text-sm font-mono bg-gray-700 text-mongodb-green-500 px-2 py-1 rounded">Total: 0</span>
 796 |         </div>
 797 |         <div id="ingestion-chunk-preview-host" class="flex-grow overflow-y-auto p-3">
 798 |           <p class="text-gray-400 text-center pt-10">Chunks will appear here.</p>
 799 |         </div>
 800 |       </div>
 801 |     </div>
 802 |     <div id="ingestion-controls" class="grid grid-cols-3 gap-4 text-sm p-4 border-t border-gray-700 mt-4 items-center">
 803 |       <div class="flex items-center gap-2">
 804 |         <label class="font-medium text-gray-300">Chunk Size:</label>
 805 |         <input type="number" id="ingestion-chunk-size" value="1000" min="100" step="100" class="w-24 bg-gray-700 border border-gray-600 rounded-md px-2 py-1 text-sm">
 806 |       </div>
 807 |       <div class="flex items-center gap-2">
 808 |         <label class="font-medium text-gray-300">Overlap:</label>
 809 |         <input type="number" id="ingestion-chunk-overlap" value="150" min="0" step="50" class="w-24 bg-gray-700 border border-gray-600 rounded-md px-2 py-1 text-sm">
 810 |       </div>
 811 |       <button id="ingestion-rechunk-btn" class="btn btn-secondary w-full">Update Chunk Preview</button>
 812 |     </div>`;
 813 | 
 814 |   showModal({
 815 |     title: "Add File to Knowledge Base",
 816 |     text: "Drop a file or click the area below, edit content if needed, adjust chunking, and submit to ingest.",
 817 |     contentHTML: modalHTML,
 818 |     onSubmit: () => {
 819 |       const content = document.getElementById('ingestion-source-content-textarea').value;
 820 |       if (!content || !sourceName) {
 821 |         alert('Please select and load a file first.');
 822 |         return;
 823 |       }
 824 |       const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value);
 825 |       const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value);
 826 | 
 827 |       if (chunkOverlap >= chunkSize) {
 828 |         alert("Chunk overlap must be less than chunk size.");
 829 |         return;
 830 |       }
 831 | 
 832 |       fetch("/ingest", {
 833 |         method: "POST",
 834 |         headers: { "Content-Type": "application/json" },
 835 |         body: JSON.stringify({
 836 |           content: content,
 837 |           source: sourceName,
 838 |           source_type: "file",
 839 |           session_id: currentSessionId,
 840 |           chunk_size: chunkSize,
 841 |           chunk_overlap: chunkOverlap,
 842 |         }),
 843 |       }).then(r => r.json()).then(resp => {
 844 |         if (resp.error) {
 845 |           alert(`Error ingesting file: ${resp.error}`);
 846 |         } else if (resp.task_id) {
 847 |           hideModal();
 848 |           pollIngestionTask(resp.task_id);
 849 |         }
 850 |       }).catch(err => alert(`Error: ${err.message}`));
 851 |     }
 852 |   });
 853 | 
 854 |   const dropZone = document.getElementById('file-drop-zone');
 855 |   const fileInput = document.getElementById('ingestion-file-input');
 856 |   const dropZonePrompt = document.getElementById('file-drop-zone-prompt');
 857 |   const dropZoneDisplay = document.getElementById('file-drop-zone-display');
 858 |   const fileNameDisplay = document.getElementById('file-name-display');
 859 |   const contentTextarea = document.getElementById('ingestion-source-content-textarea');
 860 |   const rechunkBtn = document.getElementById('ingestion-rechunk-btn');
 861 | 
 862 |   const processFile = (file) => {
 863 |     if (!file) return;
 864 |     currentFile = file;
 865 |     
 866 |     fileNameDisplay.textContent = file.name;
 867 |     dropZonePrompt.classList.add('hidden');
 868 |     dropZoneDisplay.classList.remove('hidden');
 869 |     dropZoneDisplay.classList.add('flex');
 870 | 
 871 |     contentTextarea.value = 'Loading file content...';
 872 |     const formData = new FormData();
 873 |     formData.append('file', file);
 874 | 
 875 |     fetch('/preview_file', { method: 'POST', body: formData })
 876 |       .then(r => r.json()).then(data => {
 877 |         if (data.error) {
 878 |           contentTextarea.value = `Error: ${escapeHtml(data.error)}`;
 879 |           return;
 880 |         }
 881 |         sourceName = data.filename;
 882 |         contentTextarea.value = data.content;
 883 |        
 884 |         const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value);
 885 |         const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value);
 886 |         renderChunkPreview(data.content, chunkSize, chunkOverlap, 'ingestion-chunk-preview-host', 'ingestion-chunk-count');
 887 |       }).catch(err => {
 888 |         contentTextarea.value = `Fetch error: ${escapeHtml(err.message)}`;
 889 |       });
 890 |   }
 891 | 
 892 |   dropZone.addEventListener('click', () => fileInput.click());
 893 |   dropZone.addEventListener('dragover', (e) => {
 894 |     e.preventDefault();
 895 |     dropZone.classList.add('drop-zone-dragover');
 896 |   });
 897 |   dropZone.addEventListener('dragleave', () => dropZone.classList.remove('drop-zone-dragover'));
 898 |   dropZone.addEventListener('drop', (e) => {
 899 |     e.preventDefault();
 900 |     dropZone.classList.remove('drop-zone-dragover');
 901 |     if (e.dataTransfer.files.length > 0) {
 902 |       fileInput.files = e.dataTransfer.files;
 903 |       processFile(e.dataTransfer.files[0]);
 904 |     }
 905 |   });
 906 |   fileInput.addEventListener('change', () => {
 907 |     if (fileInput.files.length > 0) {
 908 |       processFile(fileInput.files[0]);
 909 |     }
 910 |   });
 911 | 
 912 |   rechunkBtn.addEventListener('click', () => {
 913 |     const content = contentTextarea.value;
 914 |     if (!content) { alert('Load a file first.'); return; }
 915 |     const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value);
 916 |     const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value);
 917 |     renderChunkPreview(content, chunkSize, chunkOverlap, 'ingestion-chunk-preview-host', 'ingestion-chunk-count');
 918 |     rechunkBtn.classList.remove('needs-update');
 919 |   });
 920 |  
 921 |   contentTextarea.addEventListener('input', () => rechunkBtn.classList.add('needs-update'));
 922 |   document.getElementById('ingestion-chunk-size').addEventListener('input', () => rechunkBtn.classList.add('needs-update'));
 923 |   document.getElementById('ingestion-chunk-overlap').addEventListener('input', () => rechunkBtn.classList.add('needs-update'));
 924 | }
 925 | 
 926 | function handleReadUrlAndChunking(initialUrl = '') {
 927 |   const modalHTML = `
 928 |     <div class="mb-4 flex gap-2">
 929 |       <input type="text" id="ingestion-url-input" value="${initialUrl}" placeholder="Enter URL..." class="flex-grow bg-gray-700 border border-gray-600 rounded-md px-3 py-2 text-sm focus:ring-2 focus:ring-mongodb-green-500 focus:outline-none">
 930 |       <button id="ingestion-load-url-btn" class="btn btn-primary">Load Content</button>
 931 |     </div>
 932 |     <div class="flex gap-4 mt-4 h-[55vh]">
 933 |       <div class="w-1/2 flex flex-col bg-gray-900/50 rounded-lg">
 934 |         <h4 class="font-bold text-mongodb-green-500 border-b border-gray-700 p-3">Source Content (Editable)</h4>
 935 |         <div class="flex-grow p-1">
 936 |          <textarea id="ingestion-source-content-textarea" class="w-full h-full bg-transparent text-gray-200 p-2 rounded-md resize-none focus:outline-none focus:ring-1 focus:ring-mongodb-green-500" placeholder="Enter a URL and click 'Load Content'..."></textarea>
 937 |         </div>
 938 |       </div>
 939 |       <div class="w-1/2 flex flex-col bg-gray-900/50 rounded-lg">
 940 |         <div class="flex justify-between items-center border-b border-gray-700 p-3">
 941 |           <h4 class="font-bold text-mongodb-green-500">Chunk Preview</h4>
 942 |           <span id="ingestion-chunk-count" class="text-sm font-mono bg-gray-700 text-mongodb-green-500 px-2 py-1 rounded">Total: 0</span>
 943 |         </div>
 944 |         <div id="ingestion-chunk-preview-host" class="flex-grow overflow-y-auto p-3">
 945 |           <p class="text-gray-400 text-center pt-10">Chunks will appear here.</p>
 946 |         </div>
 947 |       </div>
 948 |     </div>
 949 |     <div id="ingestion-controls" class="grid grid-cols-3 gap-4 text-sm p-4 border-t border-gray-700 mt-4 items-center">
 950 |       <div class="flex items-center gap-2">
 951 |         <label class="font-medium text-gray-300">Chunk Size:</label>
 952 |         <input type="number" id="ingestion-chunk-size" value="1000" min="100" step="100" class="w-24 bg-gray-700 border border-gray-600 rounded-md px-2 py-1 text-sm">
 953 |       </div>
 954 |       <div class="flex items-center gap-2">
 955 |         <label class="font-medium text-gray-300">Overlap:</label>
 956 |         <input type="number" id="ingestion-chunk-overlap" value="150" min="0" step="50" class="w-24 bg-gray-700 border border-gray-600 rounded-md px-2 py-1 text-sm">
 957 |       </div>
 958 |       <button id="ingestion-rechunk-btn" class="btn btn-secondary w-full">Update Chunk Preview</button>
 959 |     </div>`;
 960 |  
 961 |   showModal({
 962 |     title: "Add URL to Knowledge Base",
 963 |     text: "Fetch content, edit if needed, adjust chunking, and submit to ingest.",
 964 |     contentHTML: modalHTML,
 965 |     onSubmit: () => {
 966 |       const url = document.getElementById('ingestion-url-input').value.trim();
 967 |       const content = document.getElementById('ingestion-source-content-textarea').value;
 968 |       if (!url || !content) {
 969 |         alert('Please load the URL content first.');
 970 |         return;
 971 |       }
 972 |       const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value);
 973 |       const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value);
 974 | 
 975 |       if (chunkOverlap >= chunkSize) {
 976 |         alert("Chunk overlap must be less than chunk size.");
 977 |         return;
 978 |       }
 979 |      
 980 |       fetch("/ingest", {
 981 |         method: "POST",
 982 |         headers: { "Content-Type": "application/json" },
 983 |         body: JSON.stringify({
 984 |           content: content,
 985 |           source: url,
 986 |           source_type: "url",
 987 |           session_id: currentSessionId,
 988 |           chunk_size: chunkSize,
 989 |           chunk_overlap: chunkOverlap,
 990 |         }),
 991 |       }).then(r => r.json()).then(resp => {
 992 |         if (resp.error) {
 993 |           alert(`Error ingesting URL: ${resp.error}`);
 994 |         } else if (resp.task_id) {
 995 |           hideModal();
 996 |           pollIngestionTask(resp.task_id);
 997 |         }
 998 |       }).catch(err => alert(`Error: ${err.message}`));
 999 |     }
1000 |   });
1001 | 
1002 |   const urlInput = document.getElementById('ingestion-url-input');
1003 |   const loadBtn = document.getElementById('ingestion-load-url-btn');
1004 |   const contentTextarea = document.getElementById('ingestion-source-content-textarea');
1005 |   const rechunkBtn = document.getElementById('ingestion-rechunk-btn');
1006 | 
1007 |   const loadUrlContent = () => {
1008 |     const url = urlInput.value.trim();
1009 |     if (!url) return;
1010 |     contentTextarea.value = 'Loading URL content...';
1011 |    
1012 |     fetch(`/preview_url?url=${encodeURIComponent(url)}`)
1013 |       .then(r => r.json()).then(data => {
1014 |         if (data.error) {
1015 |           contentTextarea.value = `Error: ${escapeHtml(data.error)}`;
1016 |           return;
1017 |         }
1018 |         contentTextarea.value = data.markdown;
1019 |        
1020 |         const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value);
1021 |         const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value);
1022 |         renderChunkPreview(data.markdown, chunkSize, chunkOverlap, 'ingestion-chunk-preview-host', 'ingestion-chunk-count');
1023 |       }).catch(err => {
1024 |         contentTextarea.value = `Fetch error: ${escapeHtml(err.message)}`;
1025 |       });
1026 |   };
1027 | 
1028 |   loadBtn.addEventListener('click', loadUrlContent);
1029 |   rechunkBtn.addEventListener('click', () => {
1030 |     const content = contentTextarea.value;
1031 |     if (!content) { alert('Load URL content first.'); return; }
1032 |     const chunkSize = parseInt(document.getElementById('ingestion-chunk-size').value);
1033 |     const chunkOverlap = parseInt(document.getElementById('ingestion-chunk-overlap').value);
1034 |     renderChunkPreview(content, chunkSize, chunkOverlap, 'ingestion-chunk-preview-host', 'ingestion-chunk-count');
1035 |     rechunkBtn.classList.remove('needs-update');
1036 |   });
1037 |  
1038 |   contentTextarea.addEventListener('input', () => rechunkBtn.classList.add('needs-update'));
1039 |   document.getElementById('ingestion-chunk-size').addEventListener('input', () => rechunkBtn.classList.add('needs-update'));
1040 |   document.getElementById('ingestion-chunk-overlap').addEventListener('input', () => rechunkBtn.classList.add('needs-update'));
1041 | 
1042 |   if (initialUrl) {
1043 |     loadUrlContent();
1044 |   }
1045 | }
1046 | 
1047 | function handleWebSearch() {
1048 |  showModal({
1049 |   title: "Search the Web",
1050 |   text: "Enter your search query to do a DuckDuckGo-based web search:",
1051 |   contentHTML: `<input type="text" id="web-search-input" class="w-full bg-gray-700 p-2 rounded" placeholder="Search...">`,
1052 |   onSubmit: () => {
1053 |    const query = document.getElementById("web-search-input").value.trim();
1054 |    if (!query) {
1055 |     alert("No query provided");
1056 |     return;
1057 |    }
1058 |    hideModal();
1059 |    addUserMessage(`web_search ${query}`);
1060 |    setThinking(true);
1061 |   
1062 |    fetch("/search", {
1063 |     method: "POST",
1064 |     headers: { "Content-Type": "application/json" },
1065 |     body: JSON.stringify({ query, num_results: 5 }),
1066 |    })
1067 |     .then((r) => r.json())
1068 |     .then((data) => {
1069 |      if (data.error) {
1070 |       addBotMessage({ content: `Web search error: ${data.error}` });
1071 |      } else if (data.results && data.results.length > 0) {
1072 |       const resultsHtml = data.results.map((r) => {
1073 |        const isValidUrl = r.href && (r.href.startsWith('http://') || r.href.startsWith('https://'));
1074 |        const url = isValidUrl ? r.href : '#';
1075 |        let host = 'N/A';
1076 |        if (isValidUrl) {
1077 |         try {
1078 |          host = new URL(url).hostname;
1079 |         } catch (e) { console.error('Failed to parse URL', e); }
1080 |        }
1081 |       
1082 |        return `
1083 |           <div class="web-result-card animate-fade-in-up">
1084 |            <div class="flex justify-between items-start mb-2">
1085 |             <h4 class="text-white font-bold text-lg leading-tight">
1086 |              <a href="${url}" target="_blank" class="hover:underline">${escapeHtml(r.title)}</a>
1087 |             </h4>
1088 |             <button data-url="${url}" class="read-url-btn text-xs px-3 py-1 rounded-full transition-colors font-medium flex-shrink-0 flex items-center gap-1">
1089 |              <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor" class="w-4 h-4">
1090 |               <path stroke-linecap="round" stroke-linejoin="round" d="M13.19 8.688a4.5 4.5 0 011.242 7.244l-4.5 4.5a4.5 4.5 0 01-6.364-6.364l1.757-1.757m13.35-.622l1.757-1.757a4.5 4.5 0 00-6.364-6.364l-4.5 4.5a4.5 4.5 0 001.242 7.244" />
1091 |              </svg>
1092 |              Read & Ingest
1093 |             </button>
1094 |            </div>
1095 |            <p class="text-gray-300 text-sm mb-2">${escapeHtml(r.body)}</p>
1096 |            <a href="${url}" target="_blank" class="url-link hover:underline">${host}</a>
1097 |           </div>
1098 |          `;
1099 |       }).join('');
1100 | 
1101 |       addBotMessage({ content: `<div><p>Web Search Results:</p><div class="mt-4">${resultsHtml}</div></div>` });
1102 |      
1103 |       document.querySelectorAll('.read-url-btn').forEach(button => {
1104 |        button.addEventListener('click', (e) => {
1105 |         const url = e.target.closest('button').getAttribute('data-url');
1106 |         if (url && url !== '#') {
1107 |          handleReadUrlAndChunking(url);
1108 |         }
1109 |        });
1110 |       });
1111 | 
1112 |      } else {
1113 |       addBotMessage({ content: "No web search results found." });
1114 |      }
1115 |     })
1116 |     .catch((err) => {
1117 |      addBotMessage({ content: `Web search error: ${err.message}` });
1118 |     })
1119 |     .finally(() => {
1120 |      setThinking(false);
1121 |     });
1122 |   },
1123 |  });
1124 | }
1125 | 
1126 | previewRagBtn.addEventListener("click", () => {
1127 |  const text = userInput.value.trim();
1128 |  if (!text) {
1129 |   alert("Type your query in the box first.");
1130 |   return;
1131 |  }
1132 |  const embeddingModel = embeddingModelSelector.value;
1133 |  const numSources = parseInt(numSourcesInput.value) || 3;
1134 |  const minScore = parseFloat(minScoreInput.value) || 0;
1135 |  fetch("/preview_search", {
1136 |   method: "POST",
1137 |   headers: { "Content-Type": "application/json" },
1138 |   body: JSON.stringify({
1139 |    query: text,
1140 |    session_id: currentSessionId,
1141 |    embedding_model: embeddingModel,
1142 |    num_sources: numSources,
1143 |   }),
1144 |  })
1145 |   .then((r) => r.json())
1146 |   .then((data) => {
1147 |    if (data.error) {
1148 |     alert(`Preview error: ${data.error}`);
1149 |     return;
1150 |    }
1151 |    const filteredData = data.filter(res => res.score >= minScore);
1152 |    if (!Array.isArray(filteredData) || filteredData.length === 0) {
1153 |     alert("No results found for the given query and minimum score.");
1154 |     return;
1155 |    }
1156 |    let previewContent = filteredData
1157 |     .map((res, idx) => {
1158 |      return `(${idx + 1}) Score: ${res.score.toFixed(4)} | Source: ${res.source}\n${res.content}\n---\n`;
1159 |     })
1160 |     .join("");
1161 |    showModal({
1162 |     title: "RAG Context Preview",
1163 |     text: "Retrieved chunks for your current query:",
1164 |     contentHTML: `<pre class="text-xs whitespace-pre-wrap h-96 overflow-y-auto bg-gray-900 rounded-md p-4">${escapeHtml(previewContent)}</pre>`,
1165 |    });
1166 |   })
1167 |   .catch((err) => {
1168 |    alert(`Preview request failed: ${err.message}`);
1169 |   });
1170 | });
1171 | 
1172 | minScoreInput.addEventListener("input", () => {
1173 |  minScoreValue.textContent = parseFloat(minScoreInput.value).toFixed(2);
1174 | });
1175 | maxCharsInput.addEventListener("input", () => {
1176 |  maxCharsValue.textContent = parseInt(maxCharsInput.value);
1177 | });
1178 | 
1179 | function pollIngestionTask(taskId) {
1180 |  const checkStatus = () => {
1181 |   fetch(`/ingest/status/${taskId}`)
1182 |    .then(r => r.json())
1183 |    .then(data => {
1184 |     if (data.status === 'complete') {
1185 |      addSystemMessage(`Ingestion successful! ${data.message}`);
1186 |      loadSessionsAndState();
1187 |     } else if (data.status === 'failed') {
1188 |      addSystemMessage(`Ingestion failed: ${data.message}`);
1189 |     } else {
1190 |      setTimeout(checkStatus, 2000);
1191 |     }
1192 |    })
1193 |    .catch(err => {
1194 |     addSystemMessage(`Failed to get ingestion status: ${err.message}`);
1195 |    });
1196 |  };
1197 |  addSystemMessage(`Ingestion started with Task ID: ${taskId}. This may take a moment.`);
1198 |  setTimeout(checkStatus, 2000);
1199 | }


--------------------------------------------------------------------------------