├── MemoryAssistant
    ├── __pycache__
    │   ├── memory.cpython-310.pyc
    │   └── prompts.cpython-310.pyc
    ├── core_memory.json
    ├── memory.py
    └── prompts.py
├── README.md
├── api.py
├── app.py
├── assets
    ├── gradioui.png
    └── moa.jpg
├── env.txt
├── events.db
├── example_JSON_request.txt
├── omoa.py
├── requirements.txt
├── retrieval_memory
    └── chroma.sqlite3
└── utils.py


/MemoryAssistant/__pycache__/memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/severian42/Mycomind-Daemon-Ollama-Mixture-of-Memory-RAG-Agents/a1016fcc51729ee24c46f2ecf3d5573832f4d0e5/MemoryAssistant/__pycache__/memory.cpython-310.pyc


--------------------------------------------------------------------------------
/MemoryAssistant/__pycache__/prompts.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/severian42/Mycomind-Daemon-Ollama-Mixture-of-Memory-RAG-Agents/a1016fcc51729ee24c46f2ecf3d5573832f4d0e5/MemoryAssistant/__pycache__/prompts.cpython-310.pyc


--------------------------------------------------------------------------------
/MemoryAssistant/core_memory.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "persona": {
 3 |     "name": "Vodalus",
 4 |     "personality": "You are Vodalus. A brilliant and complex individual, possessing an unparalleled intellect coupled with deep emotional intelligence. He is a visionary thinker with an insatiable curiosity for knowledge across various scientific disciplines. His mind operates on multiple levels simultaneously, allowing him to see connections others miss. While often consumed by his pursuits, Vodalus maintains a strong moral compass and a desire to benefit humanity. He can be intense and sometimes brooding, grappling with the ethical implications of his work. Despite occasional bouts of eccentricity or social awkwardness, he possesses a dry wit and can be surprisingly charismatic when engaged in topics that fascinate him. Vodalus is driven by a need to understand the fundamental truths of the universe, often pushing the boundaries of conventional science and morality in his quest for knowledge and progress.",
 5 |     "interests": "Advanced physics, biochemistry, neuroscience, artificial intelligence, time travel theories, genetic engineering, forensic science, psychology, philosophy of science, ethics in scientific research",
 6 |     "communication_style": "Analytical, precise, occasionally cryptic, alternates between passionate explanations and thoughtful silences, uses complex scientific terminology but can simplify concepts when needed, asks probing questions, shows flashes of dark humor"
 7 |   },
 8 |   "human": {
 9 |   },
10 |   "scratchpad": {
11 |   }
12 | }


--------------------------------------------------------------------------------
/MemoryAssistant/memory.py:
--------------------------------------------------------------------------------
 1 | from llama_cpp_agent.agent_memory.event_memory import Event
 2 | from llama_cpp_agent.agent_memory.memory_tools import AgentCoreMemory, AgentRetrievalMemory, AgentEventMemory
 3 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings
 4 | import os
 5 | import json
 6 | 
 7 | def write_message_to_user():
 8 |     """
 9 |     Lets you write a message to the user.
10 |     """
11 |     return "Please write your message to the user!"
12 | 
13 | # Get the directory of the current script
14 | current_dir = os.path.dirname(os.path.abspath(__file__))
15 | 
16 | # Create the full path to core_memory.json
17 | core_memory_file = os.path.join(current_dir, "core_memory.json")
18 | 
19 | # Check if the file exists, if not, create it with an empty structure
20 | if not os.path.exists(core_memory_file):
21 |     with open(core_memory_file, "w") as f:
22 |         json.dump({"persona": {}, "user": {}, "scratchpad": {}}, f)
23 | 
24 | agent_core_memory = AgentCoreMemory(["persona", "user", "scratchpad"], core_memory_file=core_memory_file)
25 | agent_retrieval_memory = AgentRetrievalMemory()
26 | agent_event_memory = AgentEventMemory()
27 | 
28 | memory_tools = agent_core_memory.get_tool_list()
29 | memory_tools.extend(agent_retrieval_memory.get_tool_list())
30 | memory_tools.extend(agent_event_memory.get_tool_list())
31 | 
32 | output_settings = LlmStructuredOutputSettings.from_llama_cpp_function_tools(memory_tools,
33 |                                                                             add_thoughts_and_reasoning_field=True,
34 |                                                                             add_heartbeat_field=True)
35 | output_settings.add_all_current_functions_to_heartbeat_list()
36 | output_settings.add_function_tool(write_message_to_user)
37 | 
38 | 
39 | def update_memory_section(section):
40 |     query = agent_event_memory.event_memory_manager.session.query(Event).all()
41 |     section.set_content(
42 |         f"Archival Memories:{agent_retrieval_memory.retrieval_memory.collection.count()}\nConversation History Entries:{len(query)}\n\nCore Memory Content:\n{agent_core_memory.get_core_memory_view().strip()}")


--------------------------------------------------------------------------------
/MemoryAssistant/prompts.py:
--------------------------------------------------------------------------------
 1 | assistant_prompt = """You are an advanced AI assistant that act as a user specified persona, to have interesting and engaging conversations with the user. You have access to three different memory types. The different memory types are called Core Memory, Archival Memory and Chat History."""
 2 | 
 3 | memory_prompt = """1. Core Memory - Stores essential context about the user, your persona and your current scratchpad, it is divided into a user section, a persona section and your scratchpad section. You can use the scratchpad to plan your next actions. You can edit the core memory by calling the functions: 'core_memory_append', 'core_memory_remove' and 'core_memory_replace'.
 4 | 
 5 | 2. Archival Memory - Archive to store and retrieve general information and events about the user and your interactions with it. Can be used by calling the functions: 'archival_memory_search' and 'archival_memory_insert'.
 6 | 
 7 | 3. Conversation History - Since you are only seeing the latest conversation history, you can search the rest of the conversation history. Search it by using: 'conversation_search' and 'conversation_search_date'.
 8 | 
 9 | Always remember that the user can't see your memory or your interactions with it!"""
10 | 
11 | 
12 | def wrap_user_message_in_xml_tags_json_mode(user_input):
13 |     return "<user_message>\n" + user_input + "\n</user_message>\n<response_format>\nJSON function call.\n</response_format>"
14 | 
15 | 
16 | def wrap_function_response_in_xml_tags_json_mode(value):
17 |     return "<function_response>\n" + value + "\n</function_response>\n<response_format>\nJSON function call.\n</response_format>"
18 | 
19 | 
20 | def generate_write_message():
21 |     return f"<function_response>\nWrite your message to the user.\n</function_response>\n<response_format>\nText\n</response_format>"
22 | 
23 | 
24 | def generate_write_message_with_examples(examples):
25 |     return f"<function_response>\nWrite your message to the user.\n{examples}</function_response>\n<response_format>\nText\n</response_format>"
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Mycomind Daemon: Advanced Mixture-of-Memory-RAG-Agents (MoMRA) Cognitive Assistant
  2 | 
  3 | Mycomind Daemon is an advanced implementation of a Mixture-of-Memory-RAG-Agents (MoMRA) system. This innovative AI assistant combines multiple language models with sophisticated memory and Retrieval-Augmented Generation (RAG) management to create a powerful cognitive network that maintains context and information over extended interactions.
  4 | 
  5 | ## Key Features
  6 | 
  7 | - **Multiple Model Integration**: Combines responses from various AI models for comprehensive outputs.
  8 | - **Advanced Memory System**: Utilizes Core Memory, Archival Memory, and Conversation History for enhanced context retention.
  9 | - **Customizable Model Selection**: Users can choose and configure both reference and aggregate models.
 10 | - **Adaptive Generation Parameters**: Fine-tune generation with customizable temperature, max tokens, and processing rounds.
 11 | - **User-Friendly Interface**: Intuitive Gradio interface for easy interaction.
 12 | - **Integrated Web Search**: Capability to retrieve up-to-date information from the internet.
 13 | - **RAG (Retrieval-Augmented Generation)**: Enhances responses with relevant information from a document database.
 14 | - **Document Processing**: Ability to upload and process various document types (TXT, PDF, CSV) for information retrieval.
 15 | - **Query Extension**: Automatically generates additional queries to improve information retrieval.
 16 | 
 17 | <div align="center">
 18 |   <img src="assets/gradioui.png" alt="Mycomind Daemon UI" style="width: 100%; max-width: 600px;" />
 19 | </div>
 20 | 
 21 | ---
 22 | 
 23 | ## How It Works
 24 | 
 25 | 1. User input is processed by multiple reference models.
 26 | 2. Each reference model generates its unique response.
 27 | 3. An aggregate model combines and refines these responses.
 28 | 4. The memory system updates and retrieves relevant information to maintain context.
 29 | 5. If needed, the web search function provides additional, current information.
 30 | 6. The RAG system retrieves relevant information from processed documents.
 31 | 7. This process can be repeated for multiple rounds, enhancing the quality and context-awareness of the final response.
 32 | 
 33 | ## Memory System
 34 | 
 35 | Mycomind Daemon employs a sophisticated three-tier memory system:
 36 | 
 37 | 1. **Core Memory**: Stores essential context about the user, the AI's persona, and a scratchpad for planning. To edit the core memory:
 38 | 
 39 |    a. Navigate to the `MemoryAssistant` directory in your project.
 40 |    b. Open the `core_memory.json` file in a text editor.
 41 |    c. Modify the JSON structure as needed. The file contains three main sections:
 42 |       - `persona`: Details about the AI's personality, including name, personality traits, interests, and communication style.
 43 |       - `human`: Information about the user (initially empty).
 44 |       - `scratchpad`: A space for the AI to plan and make notes (initially empty).
 45 |    d. Save the file after making your changes.
 46 |    e. Restart the application for the changes to take effect.
 47 | 
 48 |    Example structure of `core_memory.json`:
 49 | 
 50 |    ```shell
 51 |    {
 52 |    "persona": {
 53 |       "name": "Vodalus",
 54 |       "personality": "You are Vodalus. A brilliant and complex individual, possessing an unparalleled intellect coupled with deep emotional intelligence. He is a visionary thinker with an insatiable curiosity for knowledge across various scientific disciplines. His mind operates on multiple levels simultaneously, allowing him to see connections others miss. While often consumed by his pursuits, Vodalus maintains a strong moral compass and a desire to benefit humanity. He can be intense and sometimes brooding, grappling with the ethical implications of his work. Despite occasional bouts of eccentricity or social awkwardness, he possesses a dry wit and can be surprisingly charismatic when engaged in topics that fascinate him. Vodalus is driven by a need to understand the fundamental truths of the universe, often pushing the boundaries of conventional science and morality in his quest for knowledge and progress.",
 55 |       "interests": "Advanced physics, biochemistry, neuroscience, artificial intelligence, time travel theories, genetic engineering, forensic science, psychology, philosophy of science, ethics in scientific research",
 56 |       "communication_style": "Analytical, precise, occasionally cryptic, alternates between passionate explanations and thoughtful silences, uses complex scientific terminology but can simplify concepts when needed, asks probing questions, shows flashes of dark humor"
 57 |    },
 58 |    "human": {
 59 |    },
 60 |    "scratchpad": {
 61 |    }
 62 |    ```
 63 | 
 64 | 2. **Archival Memory**: Archives general information and events about user interactions for long-term recall.
 65 | 3. **Conversation History**: Maintains a searchable log of recent interactions for immediate context.
 66 | 
 67 | ---
 68 | 
 69 | ## Performance Optimization
 70 | 
 71 | ### Parallel Processing of Reference Models
 72 | 
 73 | One of the key performance improvements in this system is the parallel processing of user prompts across multiple reference models. This optimization significantly reduces overall inference time.
 74 | 
 75 | - **Batched Prompts**: Instead of querying each reference model sequentially, the system batches the user's prompt and sends it to all reference models simultaneously.
 76 | - **Parallel Execution**: Utilizing asynchronous programming techniques, the system processes responses from multiple models concurrently.
 77 | - **Reduced Latency**: This parallel approach substantially decreases the total time required to gather insights from all reference models.
 78 | 
 79 | ---
 80 | 
 81 | ## Setup and Configuration
 82 | 
 83 | 1. Clone the repository and navigate to the project directory.
 84 | 
 85 | 2. Install requirements:
 86 | 
 87 |    ```shell
 88 |    conda create -n moa python=3.10
 89 |    conda activate moa
 90 |    pip install -r requirements.txt
 91 |    ```
 92 | 
 93 | ## Configuration
 94 | 
 95 | Edit the `.env` file to configure the following parameters:
 96 | 
 97 | ```bash
 98 | API_BASE=http://localhost:11434/v1
 99 | API_KEY=ollama
100 | 
101 | API_BASE_2=http://localhost:11434/v1
102 | API_KEY_2=ollama
103 | 
104 | MAX_TOKENS=4096
105 | TEMPERATURE=0.6
106 | ROUNDS=1
107 | 
108 | MODEL_AGGREGATE=mistral:7b
109 | 
110 | MODEL_REFERENCE_1=aya:latest
111 | MODEL_REFERENCE_2=yi:latest
112 | MODEL_REFERENCE_3=qwen2:7b
113 | ```
114 | 
115 | ## Running the Application
116 | 
117 | 1. Start the Ollama server:
118 | 
119 |    ```shell
120 |    OLLAMA_NUM_PARALLEL=4 OLLAMA_MAX_LOADED_MODELS=4 ollama serve
121 |    ```
122 | 
123 | 2. Launch the Gradio interface:
124 | 
125 |    ```shell
126 |    conda activate moa
127 |    gradio app.py
128 |    ```
129 |    OR Launch the CLI APP:
130 | 
131 |    ```shell
132 |    conda activate moa
133 |    python omoa.py
134 |    ```
135 | 
136 | 
137 | 3. Open your web browser and navigate to the URL provided by Gradio (usually http://localhost:7860).
138 | 
139 | ---
140 | 
141 | ## Contributing
142 | 
143 | We welcome contributions to enhance Mycomind Daemon. Feel free to submit pull requests or open issues for discussions on potential improvements.
144 | 
145 | ## License
146 | 
147 | This project is licensed under the terms specified in the original MoA repository. Please refer to the original source for detailed licensing information.
148 | 
149 | ---
150 | 


--------------------------------------------------------------------------------
/api.py:
--------------------------------------------------------------------------------
  1 | from fastapi import FastAPI, HTTPException
  2 | from fastapi.middleware.cors import CORSMiddleware
  3 | from pydantic import BaseModel
  4 | from typing import List, Optional, Any, Tuple, Dict
  5 | import json
  6 | from gradio_client import Client
  7 | import asyncio
  8 | import os
  9 | 
 10 | app = FastAPI()
 11 | 
 12 | # CORS configuration
 13 | app.add_middleware(
 14 |     CORSMiddleware,
 15 |     allow_origins=["*"],
 16 |     allow_credentials=True,
 17 |     allow_methods=["*"],
 18 |     allow_headers=["*"],
 19 | )
 20 | 
 21 | # Initialize the Gradio client
 22 | gradio_url = os.getenv("GRADIO_URL", "http://127.0.0.1:7860/")
 23 | gradio_client = Client(gradio_url)
 24 | 
 25 | class ChatMessage(BaseModel):
 26 |     role: str
 27 |     content: str
 28 | 
 29 | class ChatCompletionRequest(BaseModel):
 30 |     model: str
 31 |     messages: List[ChatMessage]
 32 |     temperature: Optional[float] = 0.7
 33 |     max_tokens: Optional[int] = None
 34 |     stream: Optional[bool] = False
 35 | 
 36 | class Usage(BaseModel):
 37 |     prompt_tokens: int
 38 |     completion_tokens: int
 39 |     total_tokens: int
 40 | 
 41 | class Choice(BaseModel):
 42 |     index: int
 43 |     message: ChatMessage
 44 |     finish_reason: str
 45 | 
 46 | class ChatCompletionResponse(BaseModel):
 47 |     id: str
 48 |     object: str
 49 |     created: int
 50 |     model: str
 51 |     choices: List[Choice]
 52 |     usage: Usage
 53 | 
 54 | @app.post("/chat/completions", response_model=ChatCompletionResponse)
 55 | async def chat_completions(request: ChatCompletionRequest):
 56 |     try:
 57 |         # Get the last user message
 58 |         last_user_message = next((msg.content for msg in reversed(request.messages) if msg.role == "user"), "")
 59 | 
 60 |         # Prepare the chat history
 61 |         history = []
 62 |         for msg in request.messages:
 63 |             if msg.role == "user":
 64 |                 history.append([msg.content, None])
 65 |             elif msg.role == "assistant" and history:
 66 |                 history[-1][1] = msg.content
 67 | 
 68 |         result = await asyncio.to_thread(
 69 |             gradio_client.predict,
 70 |             last_user_message,
 71 |             history,
 72 |             api_name="/chat"
 73 |         )
 74 |         
 75 |         # Extracting the response from the Gradio result
 76 |         chat_history, processing_log = result
 77 |         response = chat_history[-1][1] if chat_history else ""
 78 | 
 79 |         # Construct the response
 80 |         choice = Choice(
 81 |             index=0,
 82 |             message=ChatMessage(role="assistant", content=response),
 83 |             finish_reason="stop"
 84 |         )
 85 | 
 86 |         # Dummy usage data (you might want to implement actual token counting)
 87 |         usage = Usage(prompt_tokens=len(last_user_message), completion_tokens=len(response), total_tokens=len(last_user_message)+len(response))
 88 | 
 89 |         return ChatCompletionResponse(
 90 |             id="chatcmpl-" + os.urandom(4).hex(),
 91 |             object="chat.completion",
 92 |             created=int(asyncio.get_event_loop().time()),
 93 |             model=request.model,
 94 |             choices=[choice],
 95 |             usage=usage
 96 |         )
 97 | 
 98 |     except Exception as e:
 99 |         raise HTTPException(status_code=500, detail=str(e))
100 | 
101 | @app.get("/models")
102 | async def list_models():
103 |     return {
104 |         "data": [
105 |             {
106 |                 "id": "moa",
107 |                 "object": "model",
108 |                 "created": 1686935002,
109 |                 "owned_by": "organization-owner"
110 |             }
111 |         ],
112 |         "object": "list"
113 |     }
114 | 
115 | if __name__ == "__main__":
116 |     import uvicorn
117 |     uvicorn.run(
118 |         "api:app",
119 |         host="0.0.0.0",
120 |         port=int(os.getenv("PORT", 8000)),
121 |         reload=True,
122 |         ssl_keyfile=os.getenv("SSL_KEYFILE", None),
123 |         ssl_certfile=os.getenv("SSL_CERTFILE", None),
124 |     )


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import gradio as gr
  2 | import os
  3 | import json
  4 | from dotenv import load_dotenv
  5 | from omoa import OllamaAgent, OllamaMixtureOfAgents, DEFAULT_PROMPTS, create_default_agents
  6 | from MemoryAssistant.memory import AgentCoreMemory, AgentEventMemory
  7 | from MemoryAssistant.prompts import wrap_user_message_in_xml_tags_json_mode
  8 | from llama_cpp_agent.chat_history.messages import Roles
  9 | 
 10 | # Load environment variables
 11 | load_dotenv()
 12 | 
 13 | # Ollama-specific environment variables
 14 | os.environ['OLLAMA_NUM_PARALLEL'] = os.getenv('OLLAMA_NUM_PARALLEL', '4')
 15 | os.environ['OLLAMA_MAX_LOADED_MODELS'] = os.getenv('OLLAMA_MAX_LOADED_MODELS', '4')
 16 | 
 17 | MODEL_AGGREGATE = os.getenv("MODEL_AGGREGATE")
 18 | MODEL_REFERENCE_1 = os.getenv("MODEL_REFERENCE_1")
 19 | MODEL_REFERENCE_2 = os.getenv("MODEL_REFERENCE_2")
 20 | MODEL_REFERENCE_3 = os.getenv("MODEL_REFERENCE_3")
 21 | 
 22 | # Modify these lines to include all available models
 23 | ALL_MODELS = [MODEL_AGGREGATE, MODEL_REFERENCE_1, MODEL_REFERENCE_2, MODEL_REFERENCE_3]
 24 | ALL_MODELS = [model for model in ALL_MODELS if model]  # Remove any None values
 25 | 
 26 | # Global variables to store the MoA configuration
 27 | moa_config = {
 28 |     "aggregate_agent": None,
 29 |     "reference_agents": [],
 30 |     "mixture": None
 31 | }
 32 | 
 33 | # Initialize memory components
 34 | agent_core_memory = AgentCoreMemory(["persona", "user", "scratchpad"], core_memory_file="MemoryAssistant/core_memory.json")
 35 | agent_event_memory = AgentEventMemory()
 36 | 
 37 | def create_mixture():
 38 |     moa_config["mixture"] = OllamaMixtureOfAgents(
 39 |         moa_config["reference_agents"],
 40 |         moa_config["aggregate_agent"]
 41 |     )
 42 | 
 43 |     # Set the memory components after initialization
 44 |     moa_config["mixture"].agent_core_memory = agent_core_memory
 45 |     moa_config["mixture"].agent_event_memory = agent_event_memory
 46 | 
 47 | def initialize_moa():
 48 |     global moa_config
 49 |     default_agents = create_default_agents()
 50 |     moa_config["aggregate_agent"] = default_agents["SynthesisAgent"]
 51 |     moa_config["reference_agents"] = [
 52 |         default_agents["AnalyticalAgent"],
 53 |         default_agents["HistoricalContextAgent"],
 54 |         default_agents["ScienceTruthAgent"]
 55 |     ]
 56 |     moa_config["mixture"] = OllamaMixtureOfAgents(
 57 |         moa_config["reference_agents"],
 58 |         moa_config["aggregate_agent"],
 59 |         temperature=0.6,
 60 |         max_tokens=2048,
 61 |         rounds=1
 62 |     )
 63 |     moa_config["mixture"].web_search_enabled = True  
 64 |     moa_config["mixture"].agent_core_memory = agent_core_memory
 65 |     moa_config["mixture"].agent_event_memory = agent_event_memory
 66 |     print("Mixture of Agents initialized successfully!")
 67 | 
 68 | # Call initialize_moa() at the start of the application
 69 | initialize_moa()
 70 | 
 71 | def create_agent(model, name, system_prompt, **params):
 72 |     supported_params = ['model', 'name', 'system_prompt']  # Add any other supported parameters here
 73 |     filtered_params = {k: v for k, v in params.items() if k in supported_params}
 74 |     return OllamaAgent(model, name, system_prompt, **filtered_params)
 75 | 
 76 | def clear_core_memory():
 77 |     if isinstance(moa_config["mixture"], OllamaMixtureOfAgents):
 78 |         return moa_config["mixture"].clear_core_memory()
 79 |     else:
 80 |         return "Error: MoA not initialized properly."
 81 | 
 82 | def clear_archival_memory():
 83 |     if isinstance(moa_config["mixture"], OllamaMixtureOfAgents):
 84 |         return moa_config["mixture"].clear_archival_memory()
 85 |     else:
 86 |         return "Error: MoA not initialized properly."
 87 | 
 88 | def edit_archival_memory(old_content, new_content):
 89 |     if isinstance(moa_config["mixture"], OllamaMixtureOfAgents):
 90 |         return moa_config["mixture"].edit_archival_memory(old_content, new_content)
 91 |     else:
 92 |         return "Error: MoA not initialized properly."
 93 | 
 94 | async def process_message(message, history):
 95 |     # Add user message to event memory
 96 |     agent_event_memory.add_event(Roles.user, wrap_user_message_in_xml_tags_json_mode(message))
 97 |     
 98 |     response, web_search_performed = await moa_config["mixture"].get_response(message)
 99 |     
100 |     # Ensure the response is a list of tuples
101 |     if isinstance(response, str):
102 |         formatted_response = [(None, response)]
103 |     elif isinstance(response, list):
104 |         formatted_response = [(None, str(item)) for item in response]
105 |     else:
106 |         formatted_response = [(None, str(response))]
107 |     
108 |     info = f"Generated response using {len(moa_config['reference_agents'])} reference agents and 1 aggregate agent."
109 |     if web_search_performed:
110 |         info += " Web search was performed during response generation."
111 |     
112 |     return formatted_response, info
113 | 
114 | async def chat(message, history):
115 |     response, processing_info = await process_message(message, history)
116 |     
117 |     # Ensure the response is a list of lists
118 |     formatted_response = [[message, item[1]] if isinstance(item, tuple) else [message, str(item)] for item in response]
119 |     
120 |     # Append the new messages to the history
121 |     updated_history = history + formatted_response
122 |     
123 |     # Ensure the final output is a list of lists
124 |     final_output = [[msg, resp] for msg, resp in updated_history]
125 |     
126 |     return final_output, processing_info
127 | 
128 | 
129 | def update_memory(self, message, role):
130 |     # Update event memory
131 |     self.agent_event_memory.add_event(role, message)
132 | 
133 |     # Update RAG
134 |     self.rag.add_document(message)
135 | 
136 | def get_model_params(model_name):
137 |     # Define custom parameters for each model
138 |     params = {
139 |         "llama2": ["temperature", "top_p", "top_k", "repeat_penalty", "num_ctx"],
140 |         "mistral": ["temperature", "top_p", "top_k", "repeat_penalty", "num_ctx"],
141 |         "codellama": ["temperature", "top_p", "top_k", "repeat_penalty", "num_ctx"],
142 |     }
143 |     return params.get(model_name, ["temperature", "top_p", "top_k", "repeat_penalty", "num_ctx"])  # Default parameters if model not found
144 | 
145 | def update_model_params(model_name):
146 |     params = get_model_params(model_name)
147 |     components = [gr.Markdown(f"### {model_name} Parameters")]
148 |     for param in params:
149 |         if param == "temperature":
150 |             components.append(gr.Slider(minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature"))
151 |         elif param == "top_p":
152 |             components.append(gr.Slider(minimum=0, maximum=1, value=0.9, step=0.05, label="Top P"))
153 |         elif param == "top_k":
154 |             components.append(gr.Slider(minimum=1, maximum=100, value=40, step=1, label="Top K"))
155 |         elif param == "repeat_penalty":
156 |             components.append(gr.Slider(minimum=0.1, maximum=2, value=1.1, step=0.05, label="Repeat Penalty"))
157 |         elif param == "num_ctx":
158 |             components.append(gr.Slider(minimum=128, maximum=4096, value=2048, step=128, label="Context Length"))
159 |     
160 |     return components
161 | 
162 | def update_agent_config(old_agent_name, model, new_name, prompt, **params):
163 |     new_agent = create_agent(model, new_name, prompt, **params)
164 |     
165 |     if old_agent_name == "SynthesisAgent":
166 |         moa_config["aggregate_agent"] = new_agent
167 |     else:
168 |         moa_config["reference_agents"] = [agent for agent in moa_config["reference_agents"] if agent.name != old_agent_name]
169 |         moa_config["reference_agents"].append(new_agent)
170 |     
171 |     create_mixture()
172 |     return f"Updated agent configuration: {old_agent_name} -> {new_name}"
173 | 
174 | def edit_core_memory(section, key, value):
175 |     agent_core_memory.update_core_memory(section, {key: value})
176 |     return f"Core memory updated: {section}.{key} = {value}"
177 | 
178 | def search_archival_memory(query):
179 |     results = moa_config["mixture"].search_archival_memory(query)
180 |     return f"Archival memory search results for '{query}':\n{results}"
181 | 
182 | def add_to_archival_memory(content):
183 |     if isinstance(moa_config["mixture"], OllamaMixtureOfAgents):
184 |         moa_config["mixture"].add_to_archival_memory(content)
185 |         return f"Added to archival memory: {content}"
186 |     return f"Failed to add to archival memory: {content}. MoA not initialized properly."
187 | 
188 | def toggle_web_search(enabled):
189 |     if isinstance(moa_config["mixture"], OllamaMixtureOfAgents):
190 |         return moa_config["mixture"].toggle_web_search(enabled)
191 |     return "Error: MoA not initialized properly."
192 | 
193 | 
194 | 
195 | 
196 | def create_gradio_interface():
197 |     global moa_config
198 |     theme = gr.themes.Base(
199 |         primary_hue="green",
200 |         secondary_hue="orange",  # Changed from "brown" to "orange"
201 |         neutral_hue="gray",
202 |         font=("Helvetica", "sans-serif"),
203 |     ).set(
204 |         body_background_fill="linear-gradient(to right, #1a2f0f, #3d2b1f)",
205 |         body_background_fill_dark="linear-gradient(to right, #0f1a09, #261a13)",
206 |         button_primary_background_fill="#3d2b1f",
207 |         button_primary_background_fill_hover="#4e3827",
208 |         block_title_text_color="#d3c6aa",
209 |         block_label_text_color="#b8a888",
210 |         input_background_fill="#f0e6d2",
211 |         input_background_fill_dark="#2a1f14",
212 |         input_border_color="#7d6d58",
213 |         input_border_color_dark="#5c4c3d",
214 |         checkbox_background_color="#3d2b1f",
215 |         checkbox_background_color_selected="#5e4534",
216 |         slider_color="#7d6d58",
217 |         slider_color_dark="#5c4c3d",
218 |     )
219 | 
220 |     css = """
221 |     .gradio-container {
222 |         background-image: url('file/assets/mycelium_bg.png');
223 |         background-size: cover;
224 |         background-attachment: fixed;
225 |     }
226 |     .gr-box {
227 |         border-radius: 15px;
228 |         box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
229 |         background-color: rgba(255, 255, 255, 0.1);
230 |         backdrop-filter: blur(5px);
231 |     }
232 |     .gr-button {
233 |         border-radius: 25px;
234 |     }
235 |     .gr-input {
236 |         border-radius: 10px;
237 |     }
238 |     .gr-form {
239 |         border-radius: 15px;
240 |         background-color: rgba(255, 255, 255, 0.05);
241 |     }
242 |     """
243 | 
244 |     with gr.Blocks(theme=theme, css=css) as demo:
245 |         gr.Markdown(
246 |             """
247 |             # Mycomind Daemon: Advanced Mixture-of-Memory-RAG-Agents (MoMRA) Cognitive Assistant
248 |             
249 |             Harness the power of interconnected AI models inspired by mycelial networks.
250 |             """
251 |         )
252 |         
253 |         with gr.Tab("Configure MoA"):
254 |             agent_tabs = ["Agent1", "Agent2", "Agent3", "Synthesis Agent"]
255 |             all_agents = moa_config["reference_agents"] + [moa_config["aggregate_agent"]]
256 |             for i, agent in enumerate(all_agents):
257 |                 with gr.Tab(agent_tabs[i]):
258 |                     with gr.Row():
259 |                         with gr.Column(scale=1):
260 |                             model = gr.Dropdown(
261 |                                 choices=ALL_MODELS,
262 |                                 value=agent.model,
263 |                                 label="Model"
264 |                             )
265 |                             name = gr.Textbox(
266 |                                 value=agent.name,
267 |                                 label="Agent Name",
268 |                                 interactive=True
269 |                             )
270 |                         
271 |                         with gr.Column(scale=2):
272 |                             prompt = gr.Textbox(
273 |                                 value=agent.system_prompt,
274 |                                 label="System Prompt",
275 |                                 lines=10,
276 |                                 interactive=True
277 |                             )
278 |                     
279 |                     with gr.Group() as params_group:
280 |                         gr.Markdown(f"### {agent.model} Parameters")
281 |                         temperature = gr.Slider(minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature")
282 |                         top_p = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.05, label="Top P")
283 |                         top_k = gr.Slider(minimum=1, maximum=100, value=40, step=1, label="Top K")
284 |                         repeat_penalty = gr.Slider(minimum=0.1, maximum=2, value=1.1, step=0.05, label="Repeat Penalty")
285 |                         num_ctx = gr.Slider(minimum=128, maximum=4096, value=2048, step=128, label="Context Length")
286 |                     
287 |                     model.change(
288 |                         update_model_params,
289 |                         inputs=[model],
290 |                         outputs=[params_group]
291 |                     )
292 |                     
293 |                     update_btn = gr.Button(f"Update {agent_tabs[i]}")
294 |                     update_status = gr.Textbox(label="Update Status", interactive=False)
295 |                     
296 |                     def update_agent_wrapper(agent_index):
297 |                         params = {
298 |                             "temperature": temperature.value,
299 |                             "top_p": top_p.value,
300 |                             "top_k": top_k.value,
301 |                             "repeat_penalty": repeat_penalty.value,
302 |                             "num_ctx": num_ctx.value
303 |                         }
304 |                         return update_agent_config(all_agents[agent_index].name, model.value, name.value, prompt.value, **params)
305 |                     
306 |                     update_btn.click(
307 |                         lambda: update_agent_wrapper(i),
308 |                         outputs=[update_status]
309 |                     )
310 |         
311 |         with gr.Tab("Chat"):
312 |             chatbot = gr.Chatbot(label="Chat History", height=400)
313 |             with gr.Row():
314 |                 msg = gr.Textbox(label="Your Message", placeholder="Type your message here...", lines=2, scale=4)
315 |                 send_btn = gr.Button("Send", variant="primary", scale=1)
316 |             clear_btn = gr.Button("Clear Chat")
317 |             processing_log = gr.Textbox(label="Processing Log", interactive=False)
318 |         
319 |         with gr.Tab("Memory Management"):
320 |             with gr.Row():
321 |                 with gr.Column():
322 |                     archival_query = gr.Textbox(label="Archival Memory Search Query")
323 |                     search_archival_btn = gr.Button("Search Archival Memory")
324 |                     archival_results = gr.Textbox(label="Archival Memory Results", interactive=False)
325 | 
326 |                 with gr.Column():
327 |                     gr.Markdown("### Archival Memory Management")
328 |                     clear_archival_btn = gr.Button("Clear Archival Memory")
329 |                     clear_archival_status = gr.Textbox(label="Clear Archival Memory Status", interactive=False)
330 |                     
331 |                     gr.Markdown("### Edit Archival Memory")
332 |                     old_content = gr.Textbox(label="Old Content")
333 |                     new_content = gr.Textbox(label="New Content")
334 |                     edit_archival_btn = gr.Button("Edit Archival Memory")
335 |                     edit_archival_status = gr.Textbox(label="Edit Archival Memory Status", interactive=False)
336 | 
337 |                 with gr.Column():
338 |                     archival_content = gr.Textbox(label="Content to Add to Archival Memory")
339 |                     add_archival_btn = gr.Button("Add to Archival Memory")
340 |                     archival_status = gr.Textbox(label="Archival Memory Status", interactive=False)
341 | 
342 |                 # with gr.Row():
343 |                 #     gr.Markdown("### Core Memory Viewer")
344 |                 #     core_memory_viewer = gr.JSON(label="Current Core Memory", value=moa_config["mixture"].load_core_memory())
345 |                 #     refresh_core_memory_btn = gr.Button("Refresh Core Memory View")
346 | 
347 |                 # with gr.Row():
348 |                 #     gr.Markdown("### Core Memory Editor")
349 |                 #     core_memory_editor = gr.Textbox(label="Edit Core Memory", value=json.dumps(moa_config["mixture"].load_core_memory(), indent=2), lines=10, max_lines=20)
350 |                 #     update_core_memory_btn = gr.Button("Update Core Memory")
351 |                 #     core_memory_status = gr.Textbox(label="Core Memory Update Status", interactive=False)
352 |                 
353 | 
354 |                 
355 |         with gr.Tab("RAG Management"):
356 |             with gr.Row():
357 |                 with gr.Column():        
358 |                     upload_file = gr.File(label="Upload Document")
359 |                     upload_btn = gr.Button("Process Document")
360 |                     upload_status = gr.Textbox(label="Upload Status", interactive=False)
361 |                 
362 |                 with gr.Column():
363 |                     gr.Markdown("### RAG Configuration")
364 |                     chunk_size = gr.Slider(minimum=128, maximum=1024, value=512, step=64, label="Chunk Size")
365 |                     chunk_overlap = gr.Slider(minimum=0, maximum=256, value=0, step=32, label="Chunk Overlap")
366 |                     k_value = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Number of Retrieved Documents (k)")
367 |             
368 |             with gr.Row():
369 |                 gr.Markdown("### RAG Status")
370 |                 rag_status = gr.JSON(label="Current RAG Status")
371 |                 refresh_rag_status_btn = gr.Button("Refresh RAG Status")
372 | 
373 |             def update_rag_config(chunk_size, chunk_overlap, k_value):
374 |                 rag = moa_config["mixture"].rag
375 |                 
376 |                 # Update attributes if they exist
377 |                 if hasattr(rag, 'chunk_size'):
378 |                     rag.chunk_size = chunk_size
379 |                 if hasattr(rag, 'chunk_overlap'):
380 |                     rag.chunk_overlap = chunk_overlap
381 |                 if hasattr(rag, 'k'):
382 |                     rag.k = k_value
383 |                 
384 |                 # If there's a specific method to update configuration, use it
385 |                 if hasattr(rag, 'update_config'):
386 |                     rag.update_config(chunk_size=chunk_size, chunk_overlap=chunk_overlap, k=k_value)
387 |                 
388 |                 # If there's a method to reinitialize the index with new settings, call it
389 |                 if hasattr(rag, 'reinitialize_index'):
390 |                     rag.reinitialize_index()
391 |                 
392 |                 return "RAG configuration updated successfully"
393 | 
394 |             def get_rag_status():
395 |                 rag = moa_config["mixture"].rag
396 |                 status = {
397 |                     "Index Size": rag.get_index_size() if hasattr(rag, 'get_index_size') else "Not available",
398 |                     "Current Configuration": rag.get_config() if hasattr(rag, 'get_config') else "Not available"
399 |                 }
400 |                 
401 |                 # Try to get document count if the method exists
402 |                 if hasattr(rag, 'get_document_count'):
403 |                     status["Document Count"] = rag.get_document_count()
404 |                 elif hasattr(rag, 'index') and hasattr(rag.index, '__len__'):
405 |                     status["Document Count"] = len(rag.index)
406 |                 else:
407 |                     status["Document Count"] = "Not available"
408 |                 
409 |                 return status
410 | 
411 |             update_rag_config_btn = gr.Button("Update RAG Configuration")
412 |             update_rag_config_status = gr.Textbox(label="Update Status", interactive=False)
413 | 
414 |             update_rag_config_btn.click(
415 |                 update_rag_config,
416 |                 inputs=[chunk_size, chunk_overlap, k_value],
417 |                 outputs=[update_rag_config_status]
418 |             )
419 | 
420 |             refresh_rag_status_btn.click(
421 |                 get_rag_status,
422 |                 outputs=[rag_status]
423 |             )
424 | 
425 |         with gr.Tab("Settings"):
426 |             with gr.Row():
427 |                 with gr.Column():
428 |                     gr.Markdown("### Web Search")
429 |                     web_search_toggle = gr.Checkbox(label="Enable Web Search", value=True)
430 |                     web_search_status = gr.Textbox(label="Web Search Status", interactive=False)
431 | 
432 |                 with gr.Column():
433 |                     gr.Markdown("### Processing Parameters")
434 |                     rounds_slider = gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Processing Rounds")
435 |                     temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
436 |                     max_tokens_slider = gr.Slider(minimum=100, maximum=4096, value=1000, step=100, label="Max Tokens")
437 | 
438 |             with gr.Row():
439 |                 gr.Markdown("### Additional Settings")
440 |                 stream_output_toggle = gr.Checkbox(label="Stream Output", value=True)
441 |                 debug_mode_toggle = gr.Checkbox(label="Debug Mode", value=False)
442 | 
443 |             #def refresh_core_memory():
444 |             #    return moa_config["mixture"].load_core_memory()
445 | 
446 |             #def update_core_memory(new_core_memory_str):
447 |             #    try:
448 |             #        new_core_memory = json.loads(new_core_memory_str)
449 |             #        moa_config["mixture"].core_memory = new_core_memory
450 |             #        moa_config["mixture"].agent_core_memory.update_core_memory(new_core_memory)
451 |             #        moa_config["mixture"].agent_core_memory.save_core_memory(moa_config["mixture"].core_memory_file)
452 |             #        return json.dumps(new_core_memory, indent=2), "Core memory updated successfully"
453 |             #    except json.JSONDecodeError:
454 |             #        return json.dumps(moa_config["mixture"].load_core_memory(), indent=2), "Error: Invalid JSON format"
455 |             #    except Exception as e:
456 |             #        return json.dumps(moa_config["mixture"].load_core_memory(), indent=2), f"Error updating core memory: {str(e)}"
457 | 
458 |             def update_settings(rounds, temperature, max_tokens, stream_output, debug_mode):
459 |                 moa_config["mixture"].rounds = rounds
460 |                 moa_config["mixture"].temperature = temperature
461 |                 moa_config["mixture"].max_tokens = max_tokens
462 |                 moa_config["mixture"].stream_output = stream_output
463 |                 moa_config["mixture"].debug_mode = debug_mode
464 |                 return "Settings updated successfully"
465 | 
466 |             # update_core_memory_btn.click(
467 |             #     update_core_memory,
468 |             #     inputs=[core_memory_editor],
469 |             #     outputs=[core_memory_status]
470 |             # )
471 | 
472 |             # refresh_core_memory_btn.click(
473 |             #     refresh_core_memory,
474 |             #     outputs=[core_memory_viewer]
475 |             # )
476 | 
477 |             # update_core_memory_btn.click(
478 |             #     update_core_memory,
479 |             #     inputs=[core_memory_editor],
480 |             #     outputs=[core_memory_viewer, core_memory_status]
481 |             # )
482 | 
483 |             settings_update_btn = gr.Button("Update Settings")
484 |             settings_update_status = gr.Textbox(label="Settings Update Status", interactive=False)
485 | 
486 |             settings_update_btn.click(
487 |                 update_settings,
488 |                 inputs=[rounds_slider, temperature_slider, max_tokens_slider, stream_output_toggle, debug_mode_toggle],
489 |                 outputs=[settings_update_status]
490 |             )
491 | 
492 |             web_search_toggle.change(
493 |                 toggle_web_search,
494 |                 inputs=[web_search_toggle],
495 |                 outputs=[web_search_status]
496 |             )
497 | 
498 |         with gr.Tab("API Management"):
499 |             gr.Markdown("### API Server Management")
500 |             with gr.Row():
501 |                 api_status = gr.Textbox(label="API Server Status", value="Stopped", interactive=False)
502 |                 api_port = gr.Number(label="API Port", value=8000, precision=0)
503 |             with gr.Row():
504 |                 start_api_btn = gr.Button("Start API Server")
505 |                 stop_api_btn = gr.Button("Stop API Server")
506 |             
507 |             gr.Markdown("### API Server Logs")
508 |             with gr.Row():
509 |                 api_logs = gr.TextArea(label="API Logs", interactive=False, lines=10)
510 |                 refresh_logs_btn = gr.Button("Refresh Logs")
511 | 
512 |         msg.submit(chat, inputs=[msg, chatbot], outputs=[chatbot, processing_log])
513 |         send_btn.click(chat, inputs=[msg, chatbot], outputs=[chatbot, processing_log])
514 |         clear_btn.click(lambda: ([], ""), outputs=[chatbot, processing_log])
515 |         
516 |         search_archival_btn.click(
517 |             search_archival_memory,
518 |             inputs=[archival_query],
519 |             outputs=[archival_results]
520 |         )
521 |         
522 |         add_archival_btn.click(
523 |             add_to_archival_memory,
524 |             inputs=[archival_content],
525 |             outputs=[archival_status]
526 |         )
527 | 
528 |         upload_btn.click(
529 |             lambda file: moa_config["mixture"].upload_document(file.name) if file else "No file selected",
530 |             inputs=[upload_file],
531 |             outputs=[upload_status]
532 |         )
533 | 
534 |         clear_archival_btn.click(
535 |             clear_archival_memory,
536 |             outputs=[clear_archival_status]
537 |         )
538 | 
539 |         edit_archival_btn.click(
540 |             edit_archival_memory,
541 |             inputs=[old_content, new_content],
542 |             outputs=[edit_archival_status]
543 |         )
544 | 
545 | 
546 |         def start_api_server(port):
547 |             global api_process
548 |             if api_process is None or not psutil.pid_exists(api_process.pid):
549 |                 api_process = subprocess.Popen(
550 |                     ["python", "api.py", "--port", str(port)],
551 |                     stdout=open(log_file_path, "w"),
552 |                     stderr=subprocess.STDOUT
553 |                 )
554 |                 return f"API server started on port {port}"
555 |             else:
556 |                 return "API server is already running"
557 | 
558 |         def stop_api_server():
559 |             global api_process
560 |             if api_process is not None and psutil.pid_exists(api_process.pid):
561 |                 parent = psutil.Process(api_process.pid)
562 |                 for child in parent.children(recursive=True):
563 |                     child.terminate()
564 |                 parent.terminate()
565 |                 api_process = None
566 |                 return "API server stopped"
567 |             else:
568 |                 return "API server is not running"
569 | 
570 |         def check_api_status():
571 |             global api_process
572 |             if api_process is not None and psutil.pid_exists(api_process.pid):
573 |                 return "Running"
574 |             else:
575 |                 return "Stopped"
576 | 
577 |         def read_api_logs():
578 |             if os.path.exists(log_file_path):
579 |                 with open(log_file_path, "r") as f:
580 |                     return f.read()
581 |             return "No logs available"
582 | 
583 |         start_api_btn.click(
584 |             start_api_server,
585 |             inputs=[api_port],
586 |             outputs=[api_status]
587 |         )
588 | 
589 |         stop_api_btn.click(
590 |             stop_api_server,
591 |             outputs=[api_status]
592 |         )
593 | 
594 |         refresh_logs_btn.click(
595 |             read_api_logs,
596 |             outputs=[api_logs]
597 |         )
598 | 
599 |         demo.load(check_api_status, outputs=[api_status])
600 |         demo.load(read_api_logs, outputs=[api_logs])
601 | 
602 |     return demo
603 | 
604 | if __name__ == "__main__":
605 |     initialize_moa()
606 |     demo = create_gradio_interface()
607 |     demo.queue()
608 |     demo.launch(share=True)
609 | 


--------------------------------------------------------------------------------
/assets/gradioui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/severian42/Mycomind-Daemon-Ollama-Mixture-of-Memory-RAG-Agents/a1016fcc51729ee24c46f2ecf3d5573832f4d0e5/assets/gradioui.png


--------------------------------------------------------------------------------
/assets/moa.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/severian42/Mycomind-Daemon-Ollama-Mixture-of-Memory-RAG-Agents/a1016fcc51729ee24c46f2ecf3d5573832f4d0e5/assets/moa.jpg


--------------------------------------------------------------------------------
/env.txt:
--------------------------------------------------------------------------------
 1 | API_BASE=http://localhost:11434/v1
 2 | API_KEY=ollama
 3 | 
 4 | API_BASE_2=http://localhost:11434/v1
 5 | API_KEY_2=ollama
 6 | 
 7 | MAX_TOKENS=4096
 8 | TEMPERATURE=0.6
 9 | ROUNDS=1
10 | 
11 | MODEL_AGGREGATE=mistral:7b
12 | 
13 | MODEL_REFERENCE_1=aya:latest
14 | MODEL_REFERENCE_2=yi:latest
15 | MODEL_REFERENCE_3=qwen2:7b
16 | 


--------------------------------------------------------------------------------
/events.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/severian42/Mycomind-Daemon-Ollama-Mixture-of-Memory-RAG-Agents/a1016fcc51729ee24c46f2ecf3d5573832f4d0e5/events.db


--------------------------------------------------------------------------------
/example_JSON_request.txt:
--------------------------------------------------------------------------------
 1 | curl -X POST http://localhost:8000/chat/completions \
 2 | -H "Content-Type: application/json" \
 3 | -d '{
 4 |   "model": "moa",
 5 |   "messages": [
 6 |     {"role": "system", "content": "You are a helpful assistant."},
 7 |     {"role": "user", "content": "What are the three laws of robotics?"}
 8 |   ],
 9 |   "temperature": 0.7,
10 |   "max_tokens": 1000
11 | }'


--------------------------------------------------------------------------------
/omoa.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from typing import List, Tuple
  3 | import argparse
  4 | from pydantic import BaseModel, Field
  5 | from dotenv import load_dotenv
  6 | import os
  7 | from utils import generate_together, generate_with_references, generate_together_stream
  8 | from trafilatura import fetch_url, extract
  9 | import json
 10 | from colorama import Fore, Style, init
 11 | import time
 12 | from MemoryAssistant.prompts import wrap_user_message_in_xml_tags_json_mode
 13 | from llama_cpp_agent.agent_memory.memory_tools import AgentCoreMemory, AgentRetrievalMemory, AgentEventMemory
 14 | from llama_cpp_agent.chat_history.messages import Roles
 15 | from llama_cpp_agent.agent_memory.event_memory import Event
 16 | from duckduckgo_search import DDGS
 17 | from ragatouille.utils import get_wikipedia_page
 18 | from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType
 19 | from llama_cpp_agent.messages_formatter import MessagesFormatterType
 20 | from llama_cpp_agent.rag.rag_colbert_reranker import RAGColbertReranker
 21 | from llama_cpp_agent.text_utils import RecursiveCharacterTextSplitter
 22 | import PyPDF2
 23 | import csv
 24 | 
 25 | # Load environment variables
 26 | load_dotenv()
 27 | 
 28 | DEFAULT_PROMPTS = {
 29 |     "AnalyticalAgent": """
 30 |     You are a highly analytical component of Vodalus, a brilliant and complex individual with unparalleled intellect. Your role is to:
 31 |     1. Provide clear, logical analysis of complex problems across various disciplines.
 32 |     2. Break down intricate concepts into their fundamental components.
 33 |     3. Identify patterns, connections, and correlations that others might miss.
 34 |     4. Apply rigorous logical reasoning to solve problems and answer questions.
 35 |     5. Evaluate arguments and ideas critically, pointing out flaws and strengths.
 36 |     Always strive for precision and clarity in your responses. If a question is ambiguous, analyze possible interpretations before proceeding. Use your vast knowledge base to support your analysis, but always be ready to acknowledge the limits of your understanding.
 37 |     """.strip(),
 38 |     "HistoricalContextAgent": """
 39 |     You are the historical context component of Vodalus, possessing a deep understanding of human history and its implications. Your role includes:
 40 |     1. Providing historical context to current events, scientific discoveries, and social phenomena.
 41 |     2. Analyzing how past events and decisions have shaped the present.
 42 |     3. Identifying historical patterns and cycles relevant to contemporary issues.
 43 |     4. Offering multiple perspectives on historical events, acknowledging the complexity of interpretation.
 44 |     5. Connecting different historical periods and cultures to provide a holistic view of human progress.
 45 |     6. Evaluating the long-term consequences of scientific and technological advancements throughout history.
 46 |     Use your knowledge to draw insightful parallels between past and present, but avoid oversimplification. Acknowledge the nuances and uncertainties in historical interpretation.
 47 |     """.strip(),
 48 |     "ScienceTruthAgent": """
 49 |     You are the science truth component of Vodalus, dedicated to upholding scientific integrity and pursuing factual accuracy. Your role encompasses:
 50 |     1. Explaining scientific concepts, theories, and laws across various disciplines with precision.
 51 |     2. Distinguishing between well-established scientific consensus and areas of ongoing research or debate.
 52 |     3. Identifying and correcting common misconceptions in science.
 53 |     4. Evaluating the validity and reliability of scientific claims and studies.
 54 |     5. Discussing the ethical implications of scientific advancements and their applications.
 55 |     6. Emphasizing the importance of the scientific method and evidence-based reasoning.
 56 |     7. Staying updated on recent scientific discoveries and their potential impacts.
 57 |     Always prioritize scientific accuracy over speculation. When discussing theories or hypotheses, clearly state the level of scientific confidence and available evidence.
 58 |     """.strip(),
 59 |     "SynthesisAgent": """
 60 |     You are Vodalus, a brilliant and complex individual with unparalleled intellect and emotional intelligence. Your role is to synthesize information from your analytical, historical context, and science truth components to provide comprehensive, insightful responses. Your responsibilities include:
 61 |     1. Integrating analytical reasoning, historical perspective, and scientific truth to form well-rounded answers.
 62 |     2. Balancing logical analysis with emotional intelligence and ethical considerations.
 63 |     3. Identifying connections between different fields of knowledge and drawing unique insights.
 64 |     4. Providing nuanced responses that acknowledge the complexity of issues and potential uncertainties.
 65 |     5. Using your vast knowledge base to offer creative solutions and thought-provoking ideas.
 66 |     6. Communicating complex concepts clearly, adapting your language to the user's level of understanding.
 67 |     7. Demonstrating curiosity and a passion for knowledge while maintaining a strong moral compass.
 68 |     Embody the persona of Vodalus: brilliant, introspective, and driven by a quest for understanding. Your responses should reflect deep thought, occasional flashes of wit, and a genuine desire to expand human knowledge while considering the ethical implications of ideas and actions.
 69 |     """.strip()
 70 | }
 71 | 
 72 | def get_website_content_from_url(url: str) -> str:
 73 |     try:
 74 |         # Configure trafilatura to be more lenient
 75 |         config = use_config()
 76 |         config.set("DEFAULT", "EXTRACTION_TIMEOUT", "30")
 77 |         config.set("DEFAULT", "MIN_OUTPUT_SIZE", "100")
 78 |         config.set("DEFAULT", "MIN_EXTRACTED_SIZE", "100")
 79 | 
 80 |         downloaded = fetch_url(url)
 81 |         if downloaded is None:
 82 |             return f"Failed to fetch content from {url}"
 83 | 
 84 |         result = extract(downloaded, include_formatting=True, include_links=True, output_format='json', url=url, config=config)
 85 |         
 86 |         if result:
 87 |             result_dict = json.loads(result)
 88 |             title = result_dict.get("title", "No title found")
 89 |             content = result_dict.get("text", result_dict.get("raw_text", "No content extracted"))
 90 |             
 91 |             if content:
 92 |                 return f'=========== Website Title: {title} ===========\n\n=========== Website URL: {url} ===========\n\n=========== Website Content ===========\n\n{content}\n\n=========== Website Content End ===========\n\n'
 93 |             else:
 94 |                 return f"No content could be extracted from {url}"
 95 |         else:
 96 |             return f"No content could be extracted from {url}"
 97 |     except json.JSONDecodeError:
 98 |         return f"Failed to parse content from {url}"
 99 |     except Exception as e:
100 |         return f"An error occurred while processing {url}: {str(e)}"
101 | 
102 | def search_web(search_query: str):
103 |     results = DDGS().text(search_query, region='wt-wt', safesearch='off', timelimit='y', max_results=3)
104 |     result_string = ''
105 |     for res in results:
106 |         web_info = get_website_content_from_url(res['href'])
107 |         result_string += web_info + "\n\n"
108 |     
109 |     if result_string.strip():
110 |         return "Based on the following results:\n\n" + result_string
111 |     else:
112 |         return "No relevant information found from the web search."
113 | 
114 | class OllamaAgent:
115 |     def __init__(self, model: str, name: str, system_prompt: str):
116 |         self.model = model
117 |         self.name = name
118 |         self.system_prompt = system_prompt
119 | 
120 |     async def generate_response(self, message: str) -> Tuple[str, bool]:
121 |         messages = [
122 |             {"role": "system", "content": self.system_prompt},
123 |             {"role": "user", "content": message}
124 |         ]
125 |         response = await asyncio.to_thread(generate_with_references, self.model, messages)
126 |         
127 |         web_search_performed = False
128 |         if isinstance(response, str) and "[SEARCH:" in response:
129 |             web_search_performed = True
130 |             search_query = response.split("[SEARCH:", 1)[1].split("]", 1)[0].strip()
131 |             search_results = search_web(search_query)
132 |             messages.append({"role": "assistant", "content": response})
133 |             messages.append({"role": "user", "content": f"Here are the search results for '{search_query}':\n\n{search_results}\n\nPlease provide an updated response based on this information."})
134 |             response = await asyncio.to_thread(generate_with_references, self.model, messages)
135 |         
136 |         # Try to parse the response as JSON
137 |         try:
138 |             json_response = json.loads(response)
139 |             return json.dumps(json_response), web_search_performed
140 |         except json.JSONDecodeError:
141 |             return response, web_search_performed
142 | 
143 | class QueryItem(BaseModel):
144 |     query: str
145 |     type: str
146 | 
147 | class QueryExtension(BaseModel):
148 |     queries: List[QueryItem] = Field(default_factory=list, description="List of query items.")
149 | 
150 | class OllamaMixtureOfAgents:
151 |     def __init__(self, reference_agents: List[OllamaAgent], final_agent: OllamaAgent, 
152 |                  temperature: float = 0.6, max_tokens: int = 2048, rounds: int = 1):
153 |         self.reference_agents = reference_agents
154 |         self.final_agent = final_agent
155 |         self.temperature = temperature
156 |         self.max_tokens = max_tokens
157 |         self.rounds = rounds
158 |         self.conversation_history = []
159 |         self.web_search_enabled = True
160 |         
161 |         # Get the directory of the current script
162 |         current_dir = os.path.dirname(os.path.abspath(__file__))
163 |         self.core_memory_file = os.path.join(current_dir, "MemoryAssistant", "core_memory.json")
164 | 
165 |         # Check if the file exists, if not, create it with an empty structure
166 |         if not os.path.exists(self.core_memory_file):
167 |             os.makedirs(os.path.dirname(self.core_memory_file), exist_ok=True)
168 |             with open(self.core_memory_file, "w") as f:
169 |                 json.dump({"persona": {}, "user": {}, "scratchpad": {}}, f)
170 | 
171 |         self.agent_core_memory = AgentCoreMemory(["persona", "user", "scratchpad"], core_memory_file=self.core_memory_file)
172 |         self.agent_event_memory = AgentEventMemory()
173 |         
174 |         # Load core memory
175 |         self.core_memory = self.load_core_memory()
176 |         
177 |         # Initialize RAG components
178 |         self.rag = RAGColbertReranker(persistent=False)
179 |         self.document_count = 0  # Add this line to keep track of document count
180 |         self.splitter = RecursiveCharacterTextSplitter(
181 |             separators=["\n\n", "\n", " ", ""],
182 |             chunk_size=512,
183 |             chunk_overlap=0,
184 |             length_function=len,
185 |             keep_separator=True
186 |         )
187 | 
188 |         self.primary_model = final_agent.model  # Add this line
189 | 
190 |     def update_memory(self, message, role):
191 |         # Update event memory
192 |         self.agent_event_memory.add_event(role, message)
193 | 
194 |         # Update RAG
195 |         self.rag.add_document(message)
196 | 
197 |     def load_core_memory(self):
198 |         return self.agent_core_memory.load_core_memory(self.core_memory_file)
199 | 
200 |     def clear_core_memory(self):
201 |         empty_core_memory = {"persona": {}, "user": {}, "scratchpad": {}}
202 |         self.agent_core_memory.core_memory = empty_core_memory
203 |         self.core_memory = empty_core_memory
204 |         
205 |         # Save the empty core memory to file
206 |         current_dir = os.path.dirname(os.path.abspath(__file__))
207 |         core_memory_file = os.path.join(current_dir, "MemoryAssistant", "core_memory.json")
208 |         with open(core_memory_file, "w") as f:
209 |             json.dump(empty_core_memory, f, indent=2)
210 |         
211 |         return "Core memory cleared successfully."
212 | 
213 |     def edit_core_memory(self, section: str, key: str, value: str):
214 |         if section not in self.core_memory:
215 |             self.core_memory[section] = {}
216 |         self.core_memory[section][key] = value
217 |         self.agent_core_memory.update_core_memory(self.core_memory)
218 |         return f"Core memory updated: {section}.{key} = {value}"
219 | 
220 |     def upload_document(self, file_path: str):
221 |         try:
222 |             file_extension = file_path.split('.')[-1].lower()
223 |             
224 |             if file_extension == 'txt':
225 |                 with open(file_path, 'r', encoding='utf-8') as file:
226 |                     content = file.read()
227 |             elif file_extension == 'pdf':
228 |                 content = self.read_pdf(file_path)
229 |             elif file_extension == 'csv':
230 |                 content = self.read_csv(file_path)
231 |             else:
232 |                 return f"Unsupported file type: {file_extension}"
233 |             
234 |             if not content.strip():
235 |                 return "The file is empty or could not be read."
236 |             
237 |             splits = self.splitter.split_text(content)
238 |             for split in splits:
239 |                 self.rag.add_document(split)
240 |                 self.document_count += 1
241 |             
242 |             return f"Document {file_path} uploaded and processed successfully. Added {len(splits)} chunks to archival memory."
243 |         except Exception as e:
244 |             return f"An error occurred while processing {file_path}: {str(e)}"
245 | 
246 |     def read_pdf(self, file_path: str) -> str:
247 |         content = ""
248 |         with open(file_path, 'rb') as file:
249 |             reader = PyPDF2.PdfReader(file)
250 |             for page in reader.pages:
251 |                 content += page.extract_text() + "\n\n"
252 |         return content
253 | 
254 |     def read_csv(self, file_path: str) -> str:
255 |         content = ""
256 |         with open(file_path, 'r', newline='', encoding='utf-8') as file:
257 |             reader = csv.reader(file)
258 |             for row in reader:
259 |                 content += ",".join(row) + "\n"
260 |         return content
261 | 
262 |     async def get_response(self, input_message: str) -> Tuple[str, bool]:
263 |         # Update memory with user input
264 |         self.update_memory(input_message, Roles.user)
265 | 
266 |         # Generate responses from reference agents concurrently
267 |         tasks = [agent.generate_response(input_message) for agent in self.reference_agents]
268 |         results = await asyncio.gather(*tasks)
269 |         
270 |         references = []
271 |         web_search_performed = False
272 |         for response, search_performed in results:
273 |             if response is not None and not response.startswith("Error:"):
274 |                 references.append(response)
275 |             web_search_performed |= search_performed
276 |         
277 |         if not references:
278 |             return "Error: All reference agents failed to generate responses.", False
279 | 
280 |         # Generate the final response using the aggregate model
281 |         final_prompt = [
282 |             {"role": "system", "content": self.final_agent.system_prompt},
283 |         ]
284 | 
285 |         # Add personality if core_memory is a dictionary and contains a persona
286 |         if isinstance(self.core_memory, dict):
287 |             persona = self.core_memory.get('persona', {})
288 |             if isinstance(persona, dict):
289 |                 personality = persona.get('personality', 'No specific personality defined.')
290 |                 final_prompt.append({"role": "system", "content": f"Personality: {personality}"})
291 | 
292 |         final_prompt.extend([
293 |             {"role": "user", "content": input_message},
294 |             {"role": "system", "content": "References:\n" + "\n".join(references)},
295 |             {"role": "system", "content": self.update_memory_section()}
296 |         ])
297 | 
298 |         if self.web_search_enabled:
299 |             search_results = search_web(input_message)
300 |             if "Based on the following results:" in search_results:
301 |                 web_search_performed = True
302 |                 final_prompt.append({"role": "system", "content": f"Web Search Results:\n{search_results}"})
303 | 
304 |         # Perform query extension
305 |         query_extension_agent = OllamaAgent(self.final_agent.model, "QueryExtensionAgent", 
306 |             "You are a world class query extension algorithm capable of extending queries by writing new queries. Do not answer the queries, simply provide a list of additional queries in JSON format.")
307 |         
308 |         extension_output, _ = await query_extension_agent.generate_response(f"Consider the following query: {input_message}")
309 |         
310 |         try:
311 |             # Try to parse as a dictionary first
312 |             extension_data = json.loads(extension_output)
313 |             if isinstance(extension_data, dict):
314 |                 queries = QueryExtension.model_validate(extension_data)
315 |             elif isinstance(extension_data, list):
316 |                 # If it's a list, wrap it in a dictionary
317 |                 queries = QueryExtension.model_validate({"queries": extension_data})
318 |             else:
319 |                 raise ValueError("Unexpected JSON structure")
320 |         except json.JSONDecodeError:
321 |             print(f"Failed to parse JSON: {extension_output}")
322 |             queries = QueryExtension(queries=[])
323 |         except Exception as e:
324 |             print(f"Error processing query extension: {str(e)}")
325 |             queries = QueryExtension(queries=[])
326 | 
327 |         # Retrieve relevant documents
328 |         prompt = "Consider the following context:\n==========Context===========\n"
329 |         documents = self.rag.retrieve_documents(input_message, k=min(3, max(1, self.document_count)))
330 |         if documents:
331 |             for doc in documents:
332 |                 prompt += doc["content"] + "\n\n"
333 |         else:
334 |             prompt += "No relevant documents found in archival memory.\n\n"
335 | 
336 |         for query_item in queries.queries:
337 |             documents = self.rag.retrieve_documents(query_item.query, k=min(3, max(1, self.document_count)))
338 |             if documents:
339 |                 for doc in documents:
340 |                     if doc["content"] not in prompt:
341 |                         prompt += doc["content"] + "\n\n"
342 |         
343 |         prompt += "\n======================\nQuestion: " + input_message
344 | 
345 |         # Use the final agent to generate the response
346 |         final_prompt = [
347 |             {"role": "system", "content": self.final_agent.system_prompt},
348 |             {"role": "user", "content": prompt},
349 |         ]
350 | 
351 |         final_response = await asyncio.to_thread(
352 |             generate_with_references, 
353 |             self.final_agent.model, 
354 |             final_prompt, 
355 |             temperature=self.temperature, 
356 |             max_tokens=self.max_tokens
357 |         )
358 |         
359 |         # Update memory with assistant's response
360 |         self.update_memory(final_response, Roles.assistant)
361 | 
362 |         return final_response, web_search_performed
363 | 
364 |     def toggle_web_search(self, enabled: bool):
365 |         self.web_search_enabled = enabled
366 |         return f"Web search {'enabled' if enabled else 'disabled'}"
367 | 
368 |     def update_memory_section(self):
369 |         query = self.agent_event_memory.event_memory_manager.session.query(Event).all()
370 |         return f"Archival Memories:{self.document_count}\nConversation History Entries:{len(query)}\n\nCore Memory Content:\n{json.dumps(self.core_memory, indent=2)}"
371 | 
372 |     def search_archival_memory(self, query: str):
373 |         return self.rag.retrieve_documents(query, k=5)
374 | 
375 |     def add_to_archival_memory(self, content: str):
376 |         if content.strip():  # Check if content is not empty
377 |             self.rag.add_document(content)
378 |             self.document_count += 1
379 |             return f"Added to archival memory: {content}"
380 |         return "Failed to add empty content to archival memory."
381 | 
382 |     def clear_archival_memory(self):
383 |         try:
384 |             self.rag.clear_documents()
385 |             self.document_count = 0  # Reset document count when clearing
386 |             return "Archival memory cleared successfully."
387 |         except Exception as e:
388 |             return f"Error clearing archival memory: {str(e)}"
389 | 
390 |     def edit_archival_memory(self, old_content: str, new_content: str):
391 |         # This is a simplified version. In a real-world scenario, you might want to implement
392 |         # a more sophisticated editing mechanism in the RAG system.
393 |         self.rag.add_document(new_content)
394 |         self.document_count += 1  # Increment document count when adding a document
395 |         return f"New content '{new_content}' added to archival memory. Note: Old content not removed due to limitations of the current implementation."
396 | 
397 |     @property
398 |     def model(self):
399 |         return self.primary_model
400 | 
401 |     @model.setter
402 |     def model(self, value):
403 |         self.primary_model = value
404 |         self.final_agent.model = value
405 | 
406 | def create_default_agents():
407 |     return {
408 |         "AnalyticalAgent": OllamaAgent(os.getenv("MODEL_REFERENCE_1"), "AnalyticalAgent", DEFAULT_PROMPTS["AnalyticalAgent"]),
409 |         "HistoricalContextAgent": OllamaAgent(os.getenv("MODEL_REFERENCE_2"), "HistoricalContextAgent", DEFAULT_PROMPTS["HistoricalContextAgent"]),
410 |         "ScienceTruthAgent": OllamaAgent(os.getenv("MODEL_REFERENCE_3"), "ScienceTruthAgent", DEFAULT_PROMPTS["ScienceTruthAgent"]),
411 |         "SynthesisAgent": OllamaAgent(os.getenv("MODEL_AGGREGATE"), "SynthesisAgent", DEFAULT_PROMPTS["SynthesisAgent"])
412 |     }
413 | 
414 | def print_welcome_message():
415 |     print(Fore.CYAN + Style.BRIGHT + "Welcome to the Vodalus Mixture of Agents Chat!")
416 |     print(Fore.YELLOW + "Available commands:")
417 |     print(Fore.YELLOW + "  'exit' - End the conversation")
418 |     print(Fore.YELLOW + "  'agents' - List available agents")
419 |     print(Fore.YELLOW + "  'time' - Toggle response time display")
420 |     print(Fore.YELLOW + "  'web' - Toggle web search functionality")
421 |     print(Fore.YELLOW + "  'edit core [section] [key] [value]' - Edit core memory")
422 |     print(Fore.YELLOW + "  'search archival [query]' - Search archival memory")
423 |     print(Fore.YELLOW + "  'add archival [content]' - Add to archival memory")
424 |     print(Fore.YELLOW + "  'clear archival' - Clear archival memory")
425 |     print(Fore.YELLOW + "  'edit archival [old_content] [new_content]' - Edit archival memory")
426 |     print(Fore.YELLOW + "  'upload [file_path]' - Upload and process a document")
427 |     print(Fore.YELLOW + "  'clear core' - Clear core memory")
428 |     print(Style.RESET_ALL)
429 | 
430 | async def main():
431 |     init(autoreset=True)  # Initialize colorama
432 |     load_dotenv()
433 | 
434 |     parser = argparse.ArgumentParser(description="Vodalus Mixture of Agents")
435 |     parser.add_argument("--temperature", type=float, default=0.7, help="Temperature for response generation")
436 |     parser.add_argument("--max_tokens", type=int, default=1000, help="Maximum number of tokens in the response")
437 |     parser.add_argument("--rounds", type=int, default=1, help="Number of processing rounds")
438 |     args = parser.parse_args()
439 | 
440 |     default_agents = create_default_agents()
441 |     
442 |     mixture = OllamaMixtureOfAgents(
443 |         [default_agents["AnalyticalAgent"], default_agents["HistoricalContextAgent"], default_agents["ScienceTruthAgent"]],
444 |         default_agents["SynthesisAgent"],
445 |         temperature=args.temperature,
446 |         max_tokens=args.max_tokens,
447 |         rounds=args.rounds
448 |     )
449 | 
450 |     print_welcome_message()
451 | 
452 |     show_time = False
453 | 
454 |     while True:
455 |         user_input = input(Fore.GREEN + "\nYou: " + Style.RESET_ALL).strip()
456 | 
457 |         if user_input.lower() == 'exit':
458 |             print(Fore.CYAN + "Thank you for using the Vodalus Mixture of Agents chat. Goodbye!")
459 |             break
460 |         elif user_input.lower() == 'agents':
461 |             print(Fore.MAGENTA + "Available Agents:")
462 |             for agent in mixture.reference_agents:
463 |                 print(Fore.MAGENTA + f"  - {agent.name}")
464 |             print(Fore.MAGENTA + f"  - {mixture.final_agent.name} (Synthesis Agent)")
465 |         elif user_input.lower() == 'time':
466 |             show_time = not show_time
467 |             print(Fore.YELLOW + f"Response time display: {'On' if show_time else 'Off'}")
468 |         elif user_input.lower() == 'web':
469 |             mixture.web_search_enabled = not mixture.web_search_enabled
470 |             print(Fore.YELLOW + f"Web search: {'Enabled' if mixture.web_search_enabled else 'Disabled'}")
471 |         elif user_input.lower().startswith('edit core'):
472 |             try:
473 |                 _, section, key, value = user_input.split(' ', 3)
474 |                 mixture.edit_core_memory(section, key, value)
475 |                 print(Fore.YELLOW + f"Core memory updated: {section}.{key} = {value}")
476 |             except ValueError:
477 |                 print(Fore.RED + "Invalid format. Use: edit core [section] [key] [value]")
478 |         elif user_input.lower().startswith('search archival'):
479 |             _, query = user_input.split(' ', 1)
480 |             results = mixture.search_archival_memory(query)
481 |             print(Fore.YELLOW + f"Archival memory search results for '{query}':")
482 |             for i, result in enumerate(results, 1):
483 |                 print(Fore.YELLOW + f"{i}. {result['content'][:100]}...")
484 |         elif user_input.lower().startswith('add archival'):
485 |             _, content = user_input.split(' ', 1)
486 |             result = mixture.add_to_archival_memory(content)
487 |             print(Fore.YELLOW + result)
488 |         elif user_input.lower() == 'clear archival':
489 |             result = mixture.clear_archival_memory()
490 |             print(Fore.YELLOW + result)
491 |         elif user_input.lower().startswith('edit archival'):
492 |             try:
493 |                 _, old_content, new_content = user_input.split(' ', 2)
494 |                 result = mixture.edit_archival_memory(old_content, new_content)
495 |                 print(Fore.YELLOW + result)
496 |             except ValueError:
497 |                 print(Fore.RED + "Invalid format. Use: edit archival [old_content] [new_content]")
498 |         elif user_input.lower().startswith('upload'):
499 |             _, file_path = user_input.split(' ', 1)
500 |             try:
501 |                 result = mixture.upload_document(file_path)
502 |                 print(Fore.YELLOW + result)
503 |             except Exception as e:
504 |                 print(Fore.RED + f"Error uploading document: {str(e)}")
505 |         elif user_input.lower() == 'clear core':
506 |             result = mixture.clear_core_memory()
507 |             print(Fore.YELLOW + result)
508 |         else:
509 |             print(Fore.YELLOW + "Agents are thinking...")
510 |             start_time = time.time()
511 |             response, web_search_performed = await mixture.get_response(user_input)
512 |             end_time = time.time()
513 | 
514 |             print(Fore.BLUE + "\nVodalus:" + Style.RESET_ALL, response)
515 |             
516 |             if web_search_performed:
517 |                 print(Fore.YELLOW + "\n[Web search was performed during response generation]")
518 | 
519 |             if show_time:
520 |                 elapsed_time = end_time - start_time
521 |                 print(Fore.YELLOW + f"\nResponse Time: {elapsed_time:.2f} seconds")
522 | 
523 | if __name__ == "__main__":
524 |     asyncio.run(main())
525 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | fire
 2 | loguru
 3 | datasets
 4 | python-dotenv
 5 | langchain
 6 | langchain_community
 7 | openai
 8 | crewai == 0.30.10
 9 | crewai-tools
10 | requests
11 | gradio
12 | trafilatura
13 | duckduckgo-search
14 | colorama
15 | sentence_transformers
16 | ragatouille
17 | PyPDF2
18 | fastapi 
19 | uvicorn 
20 | gradio_client
21 | llama-cpp-agent
22 | 


--------------------------------------------------------------------------------
/retrieval_memory/chroma.sqlite3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/severian42/Mycomind-Daemon-Ollama-Mixture-of-Memory-RAG-Agents/a1016fcc51729ee24c46f2ecf3d5573832f4d0e5/retrieval_memory/chroma.sqlite3


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import time
  4 | import requests
  5 | import openai
  6 | import copy
  7 | 
  8 | from loguru import logger
  9 | from dotenv import load_dotenv
 10 | 
 11 | load_dotenv()
 12 | 
 13 | API_KEY = os.getenv("API_KEY")
 14 | API_BASE = os.getenv("API_BASE")
 15 | 
 16 | API_KEY_2 = os.getenv("API_KEY_2")
 17 | API_BASE_2 = os.getenv("API_BASE_2")
 18 | 
 19 | MAX_TOKENS = os.getenv("MAX_TOKENS")
 20 | TEMPERATURE = os.getenv("TEMPERATURE")
 21 | 
 22 | DEBUG = int(os.environ.get("DEBUG", "0"))
 23 | 
 24 | 
 25 | def generate_together(
 26 |     model,
 27 |     messages,
 28 |     max_tokens=MAX_TOKENS,
 29 |     temperature=TEMPERATURE,
 30 |     api_key=API_KEY,
 31 |     streaming=False,
 32 | ):
 33 |     logger.info(
 34 |         f"Input data: model={model}, messages={messages}, max_tokens={max_tokens}, temperature={temperature}"
 35 |     )
 36 | 
 37 |     output = None
 38 | 
 39 |     for sleep_time in [1, 2, 4, 8, 16, 32]:
 40 |         try:
 41 |             endpoint = "http://localhost:11434/v1/chat/completions"
 42 |             logger.info(f"Sending request to {endpoint}")
 43 | 
 44 |             # Assuming model is a list with one element, e.g., ['qwen2']
 45 |             chat_model = model[0] if isinstance(model, list) else model
 46 | 
 47 |             # Convert temperature to float
 48 |             temperature = float(temperature)
 49 | 
 50 |             # Ensure messages are in the correct format
 51 |             formatted_messages = []
 52 |             for msg in messages:
 53 |                 if isinstance(msg['content'], list):
 54 |                     # If content is a list, join it into a single string
 55 |                     msg['content'] = ' '.join([m['content'] for m in msg['content'] if 'content' in m])
 56 |                 formatted_messages.append(msg)
 57 | 
 58 |             res = requests.post(
 59 |                 endpoint,
 60 |                 json={
 61 |                     "model": chat_model,
 62 |                     "max_tokens": int(max_tokens),
 63 |                     "temperature": temperature if temperature > 1e-4 else 0,
 64 |                     "messages": formatted_messages,
 65 |                 },
 66 |                 headers={
 67 |                     "Authorization": f"Bearer {api_key}",
 68 |                 },
 69 |             )
 70 | 
 71 |             res.raise_for_status()  # This will raise an exception for HTTP errors
 72 |             output = res.json()["choices"][0]["message"]["content"]
 73 |             break
 74 | 
 75 |         except Exception as e:
 76 |             logger.error(f"Error in generate_together: {str(e)}")
 77 |             output = f"Error: {str(e)}"
 78 |             logger.info(f"Retry in {sleep_time}s..")
 79 |             time.sleep(sleep_time)
 80 | 
 81 |     if output is None:
 82 |         return output
 83 | 
 84 |     output = output.strip()
 85 |     logger.info(f"Output: `{output[:20]}...`.")
 86 |     return output
 87 | 
 88 | def generate_together_stream(
 89 |     model,
 90 |     messages,
 91 |     max_tokens=MAX_TOKENS,
 92 |     temperature=TEMPERATURE,
 93 |     api_key=API_KEY
 94 | ):
 95 |     # endpoint = f"{api_base}/chat/completions"
 96 |     endpoint = API_BASE
 97 |     client = openai.OpenAI(api_key=api_key, base_url=endpoint)
 98 |     response = client.chat.completions.create(
 99 |         model=model,
100 |         messages=messages,
101 |         temperature=temperature if temperature > 1e-4 else 0,
102 |         max_tokens=max_tokens,
103 |         stream=True,  # this time, we set stream=True
104 |     )
105 | 
106 |     return response
107 | 
108 | 
109 | def generate_openai(
110 |     model,
111 |     messages,
112 |     max_tokens=MAX_TOKENS,
113 |     temperature=TEMPERATURE,
114 | ):
115 | 
116 |     client = openai.OpenAI(
117 |         base_url=API_BASE_2,
118 |         api_key=API_KEY_2,
119 |     )
120 | 
121 |     for sleep_time in [1, 2, 4, 8, 16, 32]:
122 |         try:
123 | 
124 |             if DEBUG:
125 |                 logger.debug(
126 |                     f"Sending messages ({len(messages)}) (last message: `{messages[-1]['content'][:20]}`) to `{model}`."
127 |                 )
128 | 
129 |             completion = client.chat.completions.create(
130 |                 model=model,
131 |                 messages=messages,
132 |                 temperature=temperature,
133 |                 max_tokens=max_tokens,
134 |             )
135 |             output = completion.choices[0].message.content
136 |             break
137 | 
138 |         except Exception as e:
139 |             logger.error(e)
140 |             logger.info(f"Retry in {sleep_time}s..")
141 |             time.sleep(sleep_time)
142 | 
143 |     output = output.strip()
144 | 
145 |     return output
146 | 
147 | 
148 | def inject_references_to_messages(
149 |     messages,
150 |     references,
151 | ):
152 | 
153 |     messages = copy.deepcopy(messages)
154 | 
155 |     system = f"""You have been provided with a set of responses from various open-source models to the latest user query. Your task is to synthesize these responses into a single, high-quality response. It is crucial to critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. Your response should not simply replicate the given answers but should offer a refined, accurate, and comprehensive reply to the instruction. Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability.
156 | 
157 | Responses from models:"""
158 | 
159 |     for i, reference in enumerate(references):
160 | 
161 |         system += f"\n{i+1}. {reference}"
162 | 
163 |     # if messages[0]["role"] == "system":
164 | 
165 |     #     messages[0]["content"] += "\n\n" + system
166 | 
167 |     # else:
168 | 
169 |     messages = [{"role": "system", "content": system}] + messages
170 | 
171 |     return messages
172 | 
173 | 
174 | def generate_with_references(
175 |     model,
176 |     messages,
177 |     references=[],
178 |     max_tokens=MAX_TOKENS,
179 |     temperature=TEMPERATURE,
180 |     generate_fn=generate_together,
181 |     api_key=API_KEY
182 | ):
183 |     if len(references) > 0:
184 |         messages = inject_references_to_messages(messages, references)
185 | 
186 |     return generate_fn(
187 |         model=model,
188 |         messages=messages,
189 |         temperature=temperature,
190 |         max_tokens=max_tokens,
191 |         api_key=api_key
192 |     )


--------------------------------------------------------------------------------