├── sct.png ├── memory_strategies ├── __pycache__ │ ├── utils.cpython-310.pyc │ ├── __init__.cpython-310.pyc │ ├── ai_agent.cpython-310.pyc │ ├── base_memory.cpython-310.pyc │ ├── os_memory.cpython-310.pyc │ ├── graph_memory.cpython-310.pyc │ ├── retrieval_memory.cpython-310.pyc │ ├── compression_memory.cpython-310.pyc │ ├── sequential_memory.cpython-310.pyc │ ├── hierarchical_memory.cpython-310.pyc │ ├── sliding_window_memory.cpython-310.pyc │ ├── summarization_memory.cpython-310.pyc │ └── memory_augmented_memory.cpython-310.pyc ├── base_memory.py ├── __init__.py ├── sequential_memory.py ├── utils.py ├── sliding_window_memory.py ├── ai_agent.py ├── os_memory.py ├── hierarchical_memory.py ├── memory_augmented_memory.py ├── retrieval_memory.py ├── summarization_memory.py ├── graph_memory.py └── compression_memory.py ├── requirements.txt ├── .gitignore ├── README.md ├── example_usage.py ├── LICENSE ├── api.py └── streamlit_playground.py /sct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/sct.png -------------------------------------------------------------------------------- /memory_strategies/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/ai_agent.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/ai_agent.cpython-310.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | numpy 3 | faiss-cpu 4 | networkx 5 | tiktoken 6 | python-dotenv 7 | streamlit 8 | fastapi 9 | uvicorn 10 | pydantic 11 | plotly 12 | -------------------------------------------------------------------------------- /memory_strategies/__pycache__/base_memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/base_memory.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/os_memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/os_memory.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/graph_memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/graph_memory.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/retrieval_memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/retrieval_memory.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/compression_memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/compression_memory.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/sequential_memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/sequential_memory.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/hierarchical_memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/hierarchical_memory.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/sliding_window_memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/sliding_window_memory.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/summarization_memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/summarization_memory.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/__pycache__/memory_augmented_memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/memory_augmented_memory.cpython-310.pyc -------------------------------------------------------------------------------- /memory_strategies/base_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base Memory Strategy Abstract Class 3 | 4 | This module defines the abstract base class that all memory strategies must implement. 5 | It ensures consistency and interchangeability between different memory optimization techniques. 6 | """ 7 | 8 | import abc 9 | from typing import Any, Dict, List, Optional 10 | 11 | 12 | class BaseMemoryStrategy(abc.ABC): 13 | """Abstract base class for all memory strategies.""" 14 | 15 | @abc.abstractmethod 16 | def add_message(self, user_input: str, ai_response: str) -> None: 17 | """ 18 | Add a new user-AI interaction to the memory storage. 19 | 20 | Args: 21 | user_input: The user's message 22 | ai_response: The AI's response 23 | """ 24 | pass 25 | 26 | @abc.abstractmethod 27 | def get_context(self, query: str) -> str: 28 | """ 29 | Retrieve and format relevant context from memory for the LLM. 30 | 31 | Args: 32 | query: The current user query to find relevant context for 33 | 34 | Returns: 35 | Formatted context string to send to the LLM 36 | """ 37 | pass 38 | 39 | @abc.abstractmethod 40 | def clear(self) -> None: 41 | """ 42 | Reset the memory storage, useful for starting new conversations. 43 | """ 44 | pass 45 | 46 | def get_memory_stats(self) -> Dict[str, Any]: 47 | """ 48 | Get statistics about the current memory usage. 49 | 50 | Returns: 51 | Dictionary containing memory statistics 52 | """ 53 | return { 54 | "strategy_type": self.__class__.__name__, 55 | "memory_size": "Unknown" 56 | } 57 | -------------------------------------------------------------------------------- /memory_strategies/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Memory Strategies Package 3 | 4 | This package contains 9 different memory optimization techniques for AI agents, 5 | ranging from simple sequential storage to complex operating system-like memory management. 6 | """ 7 | 8 | from .base_memory import BaseMemoryStrategy 9 | from .ai_agent import AIAgent 10 | from .utils import generate_text, generate_embedding, count_tokens, get_openai_client 11 | 12 | # Basic Memory Strategies 13 | from .sequential_memory import SequentialMemory 14 | from .sliding_window_memory import SlidingWindowMemory 15 | from .summarization_memory import SummarizationMemory 16 | 17 | # Advanced Memory Strategies 18 | from .retrieval_memory import RetrievalMemory 19 | from .memory_augmented_memory import MemoryAugmentedMemory 20 | from .hierarchical_memory import HierarchicalMemory 21 | 22 | # Complex Memory Strategies 23 | from .graph_memory import GraphMemory 24 | from .compression_memory import CompressionMemory 25 | from .os_memory import OSMemory 26 | 27 | __all__ = [ 28 | # Base classes 29 | "BaseMemoryStrategy", 30 | "AIAgent", 31 | 32 | # Utilities 33 | "generate_text", 34 | "generate_embedding", 35 | "count_tokens", 36 | "get_openai_client", 37 | 38 | # Basic strategies 39 | "SequentialMemory", 40 | "SlidingWindowMemory", 41 | "SummarizationMemory", 42 | 43 | # Advanced strategies 44 | "RetrievalMemory", 45 | "MemoryAugmentedMemory", 46 | "HierarchicalMemory", 47 | 48 | # Complex strategies 49 | "GraphMemory", 50 | "CompressionMemory", 51 | "OSMemory" 52 | ] 53 | 54 | # Strategy metadata for easy reference 55 | STRATEGY_INFO = { 56 | "SequentialMemory": { 57 | "complexity": "Basic", 58 | "description": "Stores all conversation history chronologically", 59 | "best_for": "Simple, short-term chatbots" 60 | }, 61 | "SlidingWindowMemory": { 62 | "complexity": "Basic", 63 | "description": "Maintains only the most recent N conversations", 64 | "best_for": "Controlled memory usage scenarios" 65 | }, 66 | "SummarizationMemory": { 67 | "complexity": "Basic", 68 | "description": "Compresses conversation history using LLM summarization", 69 | "best_for": "Long-term creative conversations" 70 | }, 71 | "RetrievalMemory": { 72 | "complexity": "Advanced", 73 | "description": "Uses vector embeddings and similarity search (RAG)", 74 | "best_for": "Accurate long-term recall, industry standard" 75 | }, 76 | "MemoryAugmentedMemory": { 77 | "complexity": "Advanced", 78 | "description": "Combines sliding window with persistent memory tokens", 79 | "best_for": "Personal assistants requiring fact retention" 80 | }, 81 | "HierarchicalMemory": { 82 | "complexity": "Advanced", 83 | "description": "Multi-layered system with working + long-term memory", 84 | "best_for": "Human-like cognitive patterns" 85 | }, 86 | "GraphMemory": { 87 | "complexity": "Complex", 88 | "description": "Treats conversations as nodes with relationship edges", 89 | "best_for": "Expert systems and knowledge bases" 90 | }, 91 | "CompressionMemory": { 92 | "complexity": "Complex", 93 | "description": "Intelligent compression and integration of historical data", 94 | "best_for": "Space-constrained environments" 95 | }, 96 | "OSMemory": { 97 | "complexity": "Complex", 98 | "description": "Simulates RAM/disk with active/passive memory", 99 | "best_for": "Large-scale systems with unlimited memory needs" 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /memory_strategies/sequential_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sequential Memory Strategy 3 | 4 | This is the most basic memory strategy that stores the entire conversation 5 | history in chronological order. While it provides perfect recall, it's not 6 | scalable as the context grows linearly with each conversation turn. 7 | """ 8 | 9 | from typing import List, Dict, Any 10 | from .base_memory import BaseMemoryStrategy 11 | from .utils import count_tokens 12 | 13 | 14 | class SequentialMemory(BaseMemoryStrategy): 15 | """ 16 | Sequential memory strategy that stores all conversation history. 17 | 18 | Advantages: 19 | - Simple implementation 20 | - Perfect recall of all conversations 21 | - Complete context preservation 22 | 23 | Disadvantages: 24 | - Linear token growth with conversation length 25 | - Expensive for long conversations 26 | - May hit token limits quickly 27 | """ 28 | 29 | def __init__(self): 30 | """Initialize memory with empty list to store conversation history.""" 31 | self.history: List[Dict[str, str]] = [] 32 | self.total_content_tokens = 0 # Track cumulative content token usage 33 | self.total_prompt_tokens = 0 # Track cumulative prompt tokens sent to LLM 34 | 35 | def add_message(self, user_input: str, ai_response: str) -> None: 36 | """ 37 | Add new user-AI interaction to history. 38 | 39 | Each interaction is stored as two dictionary entries in the list. 40 | 41 | Args: 42 | user_input: User's message 43 | ai_response: AI's response 44 | """ 45 | self.history.append({"role": "user", "content": user_input}) 46 | self.history.append({"role": "assistant", "content": ai_response}) 47 | 48 | # Update content token count (just the message content) 49 | self.total_content_tokens += count_tokens(user_input + ai_response) 50 | 51 | def get_context(self, query: str) -> str: 52 | """ 53 | Retrieve entire conversation history formatted as a single string. 54 | 55 | The 'query' parameter is ignored since this strategy always 56 | returns the complete history. 57 | 58 | Args: 59 | query: Current user query (ignored in this strategy) 60 | 61 | Returns: 62 | Complete conversation history as formatted string 63 | """ 64 | if not self.history: 65 | return "No conversation history yet." 66 | 67 | # Join all messages into a single string separated by newlines 68 | return "\n".join([ 69 | f"{turn['role'].capitalize()}: {turn['content']}" 70 | for turn in self.history 71 | ]) 72 | 73 | def clear(self) -> None: 74 | """Reset conversation history by clearing the list.""" 75 | self.history = [] 76 | self.total_content_tokens = 0 77 | self.total_prompt_tokens = 0 78 | print("Sequential memory cleared.") 79 | 80 | def get_memory_stats(self) -> Dict[str, Any]: 81 | """ 82 | Get statistics about current memory usage. 83 | 84 | Returns: 85 | Dictionary containing memory statistics 86 | """ 87 | total_messages = len(self.history) 88 | total_turns = total_messages // 2 # Each turn has user + assistant message 89 | 90 | return { 91 | "strategy_type": "SequentialMemory", 92 | "total_messages": total_messages, 93 | "total_turns": total_turns, 94 | "total_content_tokens": self.total_content_tokens, 95 | "total_prompt_tokens": self.total_prompt_tokens, 96 | "memory_size": f"{total_messages} messages", 97 | "advantages": ["Perfect recall", "Simple implementation"], 98 | "disadvantages": ["Linear token growth", "Not scalable"] 99 | } 100 | -------------------------------------------------------------------------------- /memory_strategies/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility Functions for Memory Strategies 3 | 4 | This module provides core utility functions used across all memory strategies, 5 | including text generation, embedding generation, and token counting. 6 | """ 7 | 8 | import os 9 | import time 10 | import tiktoken 11 | from typing import List, Optional 12 | from openai import OpenAI 13 | 14 | 15 | # Initialize tokenizer for token counting 16 | tokenizer = tiktoken.get_encoding("cl100k_base") 17 | 18 | # Model configurations 19 | GENERATION_MODEL = "gpt-4o-mini" 20 | EMBEDDING_MODEL = "text-embedding-3-small" 21 | 22 | 23 | def get_openai_client() -> OpenAI: 24 | """ 25 | Initialize and return OpenAI client with API key from environment. 26 | 27 | Returns: 28 | Configured OpenAI client instance 29 | """ 30 | api_key = os.getenv("OPENAI_API_KEY") 31 | if not api_key: 32 | raise ValueError("OPENAI_API_KEY not found in environment variables") 33 | 34 | return OpenAI(api_key=api_key) 35 | 36 | 37 | def generate_text(system_prompt: str, user_prompt: str, client: Optional[OpenAI] = None) -> str: 38 | """ 39 | Generate text response using the LLM API. 40 | 41 | Args: 42 | system_prompt: System instructions defining AI role and behavior 43 | user_prompt: User input that AI should respond to 44 | client: Optional OpenAI client instance 45 | 46 | Returns: 47 | Generated text content from the AI 48 | """ 49 | if client is None: 50 | client = get_openai_client() 51 | 52 | try: 53 | response = client.chat.completions.create( 54 | model=GENERATION_MODEL, 55 | messages=[ 56 | {"role": "system", "content": system_prompt}, 57 | {"role": "user", "content": user_prompt} 58 | ], 59 | temperature=0.7, 60 | max_tokens=1000 61 | ) 62 | return response.choices[0].message.content 63 | except Exception as e: 64 | return f"Error generating text: {str(e)}" 65 | 66 | 67 | def generate_embedding(text: str, client: Optional[OpenAI] = None) -> List[float]: 68 | """ 69 | Generate embedding vector for given text using the embedding model. 70 | 71 | Args: 72 | text: Input text to convert to embedding vector 73 | client: Optional OpenAI client instance 74 | 75 | Returns: 76 | List of floats representing the embedding vector 77 | """ 78 | if client is None: 79 | client = get_openai_client() 80 | 81 | try: 82 | response = client.embeddings.create( 83 | model=EMBEDDING_MODEL, 84 | input=text 85 | ) 86 | return response.data[0].embedding 87 | except Exception as e: 88 | print(f"Error generating embedding: {str(e)}") 89 | return [] 90 | 91 | 92 | def count_tokens(text: str) -> int: 93 | """ 94 | Count the number of tokens in the given text string. 95 | 96 | Args: 97 | text: String to tokenize and count 98 | 99 | Returns: 100 | Integer count of tokens 101 | """ 102 | return len(tokenizer.encode(text)) 103 | 104 | 105 | def format_conversation_turn(user_input: str, ai_response: str) -> str: 106 | """ 107 | Format a conversation turn into a standardized string format. 108 | 109 | Args: 110 | user_input: User's message 111 | ai_response: AI's response 112 | 113 | Returns: 114 | Formatted conversation turn string 115 | """ 116 | return f"User: {user_input}\nAssistant: {ai_response}" 117 | 118 | 119 | def measure_time(func): 120 | """ 121 | Decorator to measure execution time of functions. 122 | 123 | Args: 124 | func: Function to measure 125 | 126 | Returns: 127 | Wrapper function that measures execution time 128 | """ 129 | def wrapper(*args, **kwargs): 130 | start_time = time.time() 131 | result = func(*args, **kwargs) 132 | end_time = time.time() 133 | execution_time = end_time - start_time 134 | print(f"[TIMING] {func.__name__} executed in {execution_time:.4f} seconds") 135 | return result 136 | return wrapper 137 | -------------------------------------------------------------------------------- /memory_strategies/sliding_window_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sliding Window Memory Strategy 3 | 4 | This strategy maintains only the most recent N conversation turns using a fixed-size 5 | window. It prevents unbounded context growth but may lose important historical information. 6 | """ 7 | 8 | from collections import deque 9 | from typing import List, Dict, Any 10 | from .base_memory import BaseMemoryStrategy 11 | from .utils import count_tokens 12 | 13 | 14 | class SlidingWindowMemory(BaseMemoryStrategy): 15 | """ 16 | Sliding window memory strategy that keeps only recent N conversation turns. 17 | 18 | Advantages: 19 | - Controlled memory usage 20 | - Predictable token consumption 21 | - Scalable for long conversations 22 | 23 | Disadvantages: 24 | - Loses old information 25 | - May forget important early context 26 | - Fixed window size may not suit all scenarios 27 | """ 28 | 29 | def __init__(self, window_size: int = 4): 30 | """ 31 | Initialize memory with fixed-size deque. 32 | 33 | Args: 34 | window_size: Number of conversation turns to retain in memory. 35 | A single turn includes one user message and one AI response. 36 | """ 37 | self.window_size = window_size 38 | # Deque with maxlen automatically discards oldest items when full 39 | self.history = deque(maxlen=window_size) 40 | self.total_content_tokens = 0 # Track cumulative content token usage 41 | self.total_prompt_tokens = 0 # Track cumulative prompt tokens sent to LLM 42 | 43 | def add_message(self, user_input: str, ai_response: str) -> None: 44 | """ 45 | Add new conversation turn to history. 46 | 47 | If deque is full, the oldest turn is automatically removed. 48 | 49 | Args: 50 | user_input: User's message 51 | ai_response: AI's response 52 | """ 53 | # Each turn (user input + AI response) is stored as a single element 54 | # This makes it easy to manage window size by turns 55 | turn_data = [ 56 | {"role": "user", "content": user_input}, 57 | {"role": "assistant", "content": ai_response} 58 | ] 59 | self.history.append(turn_data) 60 | 61 | # Update content token count (just the message content) 62 | self.total_content_tokens += count_tokens(user_input + ai_response) 63 | 64 | def get_context(self, query: str) -> str: 65 | """ 66 | Retrieve conversation history within current window. 67 | 68 | The 'query' parameter is ignored in this strategy. 69 | 70 | Args: 71 | query: Current user query (ignored in this strategy) 72 | 73 | Returns: 74 | Recent conversation history as formatted string 75 | """ 76 | if not self.history: 77 | return "No conversation history yet." 78 | 79 | # Create temporary list to hold formatted messages 80 | context_list = [] 81 | 82 | # Iterate through each turn stored in the deque 83 | for turn in self.history: 84 | # Iterate through user and assistant messages in the turn 85 | for message in turn: 86 | # Format message and add to our list 87 | context_list.append(f"{message['role'].capitalize()}: {message['content']}") 88 | 89 | # Join all formatted messages into a single string 90 | return "\n".join(context_list) 91 | 92 | def clear(self) -> None: 93 | """Reset conversation history by clearing the deque.""" 94 | self.history.clear() 95 | self.total_content_tokens = 0 96 | self.total_prompt_tokens = 0 97 | print("Sliding window memory cleared.") 98 | 99 | def get_memory_stats(self) -> Dict[str, Any]: 100 | """ 101 | Get statistics about current memory usage. 102 | 103 | Returns: 104 | Dictionary containing memory statistics 105 | """ 106 | current_turns = len(self.history) 107 | total_messages = sum(len(turn) for turn in self.history) 108 | 109 | return { 110 | "strategy_type": "SlidingWindowMemory", 111 | "window_size": self.window_size, 112 | "current_turns": current_turns, 113 | "total_messages": total_messages, 114 | "total_content_tokens": self.total_content_tokens, 115 | "total_prompt_tokens": self.total_prompt_tokens, 116 | "memory_size": f"{current_turns}/{self.window_size} turns", 117 | "advantages": ["Controlled memory", "Predictable tokens", "Scalable"], 118 | "disadvantages": ["Loses old info", "Fixed window size"] 119 | } 120 | -------------------------------------------------------------------------------- /memory_strategies/ai_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI Agent Class 3 | 4 | This module contains the core AI Agent that coordinates conversation flow 5 | and works with different memory strategies using the strategy pattern. 6 | """ 7 | 8 | import time 9 | from typing import Optional 10 | from openai import OpenAI 11 | 12 | from .base_memory import BaseMemoryStrategy 13 | from .utils import generate_text, count_tokens, get_openai_client 14 | 15 | 16 | class AIAgent: 17 | """ 18 | Main AI Agent class designed to work with any memory strategy. 19 | 20 | Uses the strategy pattern to allow switching between different 21 | memory management approaches at runtime. 22 | """ 23 | 24 | def __init__( 25 | self, 26 | memory_strategy: BaseMemoryStrategy, 27 | system_prompt: str = "You are a helpful AI assistant.", 28 | client: Optional[OpenAI] = None 29 | ): 30 | """ 31 | Initialize the AI agent. 32 | 33 | Args: 34 | memory_strategy: Instance of a class inheriting from BaseMemoryStrategy 35 | system_prompt: Initial instructions for the LLM defining its personality 36 | client: Optional OpenAI client instance 37 | """ 38 | self.memory = memory_strategy 39 | self.system_prompt = system_prompt 40 | self.client = client or get_openai_client() 41 | print(f"Agent initialized with {type(memory_strategy).__name__}.") 42 | 43 | def chat(self, user_input: str, verbose: bool = True) -> dict: 44 | """ 45 | Process a single conversation turn. 46 | 47 | Args: 48 | user_input: The user's latest message 49 | verbose: Whether to print detailed debug information 50 | 51 | Returns: 52 | Dictionary containing response and performance metrics 53 | """ 54 | if verbose: 55 | print(f"\n{'='*25} NEW INTERACTION {'='*25}") 56 | print(f"User > {user_input}") 57 | 58 | # Step 1: Retrieve context from the agent's memory strategy 59 | start_time = time.time() 60 | context = self.memory.get_context(query=user_input) 61 | retrieval_time = time.time() - start_time 62 | 63 | # Step 2: Build complete prompt for the LLM 64 | full_user_prompt = f"### MEMORY CONTEXT\n{context}\n\n### CURRENT REQUEST\n{user_input}" 65 | 66 | # Step 3: Calculate token usage for debugging 67 | prompt_tokens = count_tokens(self.system_prompt + full_user_prompt) 68 | 69 | if verbose: 70 | print("\n--- Agent Debug Info ---") 71 | print(f"Memory Retrieval Time: {retrieval_time:.4f} seconds") 72 | print(f"Estimated Prompt Tokens: {prompt_tokens}") 73 | print(f"\n[Context Retrieved]:\n{context}\n") 74 | 75 | # Step 4: Call LLM to get response 76 | start_time = time.time() 77 | ai_response = generate_text(self.system_prompt, full_user_prompt, self.client) 78 | generation_time = time.time() - start_time 79 | 80 | # Step 5: Update memory with the latest interaction 81 | self.memory.add_message(user_input, ai_response) 82 | 83 | # Step 6: Update prompt token tracking if memory strategy supports it 84 | if hasattr(self.memory, 'total_prompt_tokens'): 85 | self.memory.total_prompt_tokens += prompt_tokens 86 | 87 | # Step 7: Display AI response and performance metrics 88 | if verbose: 89 | print(f"\nAgent > {ai_response}") 90 | print(f"(LLM Generation Time: {generation_time:.4f} seconds)") 91 | print(f"{'='*70}") 92 | 93 | return { 94 | "user_input": user_input, 95 | "ai_response": ai_response, 96 | "retrieval_time": retrieval_time, 97 | "generation_time": generation_time, 98 | "prompt_tokens": prompt_tokens, 99 | "context": context 100 | } 101 | 102 | def get_memory_stats(self) -> dict: 103 | """ 104 | Get current memory statistics. 105 | 106 | Returns: 107 | Dictionary containing memory usage statistics 108 | """ 109 | return self.memory.get_memory_stats() 110 | 111 | def clear_memory(self) -> None: 112 | """ 113 | Clear the agent's memory. 114 | """ 115 | self.memory.clear() 116 | print("Agent memory cleared.") 117 | 118 | def set_system_prompt(self, new_prompt: str) -> None: 119 | """ 120 | Update the system prompt. 121 | 122 | Args: 123 | new_prompt: New system prompt to use 124 | """ 125 | self.system_prompt = new_prompt 126 | print(f"System prompt updated to: {new_prompt[:50]}...") 127 | -------------------------------------------------------------------------------- /memory_strategies/os_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Operating System-like Memory Management Strategy 3 | 4 | This strategy simulates how computer operating systems manage memory with 5 | RAM (active memory) and disk (passive memory), implementing paging mechanisms 6 | for intelligent memory management. 7 | """ 8 | 9 | from collections import deque 10 | from typing import Dict, Any, Optional, Tuple 11 | from .base_memory import BaseMemoryStrategy 12 | 13 | 14 | class OSMemory(BaseMemoryStrategy): 15 | """ 16 | OS-like memory management strategy simulating RAM and disk storage. 17 | 18 | Advantages: 19 | - Scalable memory management 20 | - Intelligent paging system 21 | - Efficient active context 22 | - Nearly unlimited memory capacity 23 | 24 | Disadvantages: 25 | - Complex paging logic 26 | - May miss relevant passive information 27 | - Requires tuning of RAM size 28 | - Page fault overhead 29 | """ 30 | 31 | def __init__(self, ram_size: int = 2): 32 | """ 33 | Initialize OS-like memory system. 34 | 35 | Args: 36 | ram_size: Maximum number of conversation turns to retain in active memory (RAM) 37 | """ 38 | self.ram_size = ram_size 39 | 40 | # 'RAM' is a deque that holds recent turns 41 | self.active_memory: deque = deque() 42 | 43 | # 'Hard disk' is a dictionary for storing paged-out turns 44 | self.passive_memory: Dict[int, str] = {} 45 | 46 | # Counter to give each turn a unique ID 47 | self.turn_count = 0 48 | 49 | def add_message(self, user_input: str, ai_response: str) -> None: 50 | """ 51 | Add turn to active memory, page out oldest turn to passive memory if RAM is full. 52 | 53 | Args: 54 | user_input: User's message 55 | ai_response: AI's response 56 | """ 57 | turn_id = self.turn_count 58 | turn_data = f"User: {user_input}\nAI: {ai_response}" 59 | 60 | # Check if active memory (RAM) is full 61 | if len(self.active_memory) >= self.ram_size: 62 | # If so, remove least recently used (oldest) item from active memory 63 | lru_turn_id, lru_turn_data = self.active_memory.popleft() 64 | 65 | # Move it to passive memory (hard disk) 66 | self.passive_memory[lru_turn_id] = lru_turn_data 67 | print(f"--- [OS Memory: Paging out Turn {lru_turn_id} to passive storage.] ---") 68 | 69 | # Add new turn to active memory 70 | self.active_memory.append((turn_id, turn_data)) 71 | self.turn_count += 1 72 | 73 | def get_context(self, query: str) -> str: 74 | """ 75 | Provide RAM context and simulate 'page faults' by pulling from passive memory if needed. 76 | 77 | Args: 78 | query: Current user query 79 | 80 | Returns: 81 | Context from active memory and any paged-in passive memory 82 | """ 83 | # Base context is always what's in active memory 84 | active_context = "\n".join([data for _, data in self.active_memory]) 85 | 86 | # Simulate page fault: check if any words in query match content in passive memory 87 | paged_in_context = "" 88 | query_words = [word.lower() for word in query.split() if len(word) > 3] 89 | 90 | for turn_id, data in self.passive_memory.items(): 91 | # Check for keyword matches in passive memory 92 | if any(word in data.lower() for word in query_words): 93 | paged_in_context += f"\n(Paged in from Turn {turn_id}): {data}" 94 | print(f"--- [OS Memory: Page fault! Paging in Turn {turn_id} from passive storage.] ---") 95 | 96 | # Combine active context with any paged-in context 97 | if paged_in_context: 98 | return f"### Active Memory (RAM):\n{active_context}\n\n### Paged-In from Passive Memory (Disk):\n{paged_in_context}" 99 | else: 100 | return f"### Active Memory (RAM):\n{active_context}" if active_context else "No information in memory yet." 101 | 102 | def clear(self) -> None: 103 | """Clear both active and passive memory storage.""" 104 | self.active_memory.clear() 105 | self.passive_memory = {} 106 | self.turn_count = 0 107 | print("OS-like memory cleared.") 108 | 109 | def get_memory_stats(self) -> Dict[str, Any]: 110 | """ 111 | Get statistics about current memory usage. 112 | 113 | Returns: 114 | Dictionary containing memory statistics 115 | """ 116 | active_turns = len(self.active_memory) 117 | passive_turns = len(self.passive_memory) 118 | total_turns = self.turn_count 119 | 120 | return { 121 | "strategy_type": "OSMemory", 122 | "ram_size": self.ram_size, 123 | "active_turns": active_turns, 124 | "passive_turns": passive_turns, 125 | "total_turns": total_turns, 126 | "memory_size": f"{active_turns} in RAM, {passive_turns} on disk", 127 | "advantages": ["Scalable management", "Intelligent paging", "Unlimited capacity"], 128 | "disadvantages": ["Complex paging", "May miss passive info", "Page fault overhead"] 129 | } 130 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[codz] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | #poetry.toml 110 | 111 | # pdm 112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 113 | # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. 114 | # https://pdm-project.org/en/latest/usage/project/#working-with-version-control 115 | #pdm.lock 116 | #pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # pixi 121 | # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. 122 | #pixi.lock 123 | # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one 124 | # in the .venv directory. It is recommended not to include this directory in version control. 125 | .pixi 126 | 127 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 128 | __pypackages__/ 129 | 130 | # Celery stuff 131 | celerybeat-schedule 132 | celerybeat.pid 133 | 134 | # SageMath parsed files 135 | *.sage.py 136 | 137 | # Environments 138 | .env 139 | .envrc 140 | .venv 141 | env/ 142 | venv/ 143 | ENV/ 144 | env.bak/ 145 | venv.bak/ 146 | 147 | # Spyder project settings 148 | .spyderproject 149 | .spyproject 150 | 151 | # Rope project settings 152 | .ropeproject 153 | 154 | # mkdocs documentation 155 | /site 156 | 157 | # mypy 158 | .mypy_cache/ 159 | .dmypy.json 160 | dmypy.json 161 | 162 | # Pyre type checker 163 | .pyre/ 164 | 165 | # pytype static type analyzer 166 | .pytype/ 167 | 168 | # Cython debug symbols 169 | cython_debug/ 170 | 171 | # PyCharm 172 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 173 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 174 | # and can be added to the global gitignore or merged into this file. For a more nuclear 175 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 176 | #.idea/ 177 | 178 | # Abstra 179 | # Abstra is an AI-powered process automation framework. 180 | # Ignore directories containing user credentials, local state, and settings. 181 | # Learn more at https://abstra.io/docs 182 | .abstra/ 183 | 184 | # Visual Studio Code 185 | # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 186 | # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore 187 | # and can be added to the global gitignore or merged into this file. However, if you prefer, 188 | # you could uncomment the following to ignore the entire vscode folder 189 | # .vscode/ 190 | 191 | # Ruff stuff: 192 | .ruff_cache/ 193 | 194 | # PyPI configuration file 195 | .pypirc 196 | 197 | # Cursor 198 | # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to 199 | # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data 200 | # refer to https://docs.cursor.com/context/ignore-files 201 | .cursorignore 202 | .cursorindexingignore 203 | 204 | # Marimo 205 | marimo/_static/ 206 | marimo/_lsp/ 207 | __marimo__/ 208 | -------------------------------------------------------------------------------- /memory_strategies/hierarchical_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hierarchical Memory Strategy 3 | 4 | This strategy combines multiple memory types into a layered system that mimics 5 | human memory patterns with working memory (short-term) and long-term memory layers. 6 | """ 7 | 8 | from typing import List, Dict, Any, Optional 9 | from openai import OpenAI 10 | from .base_memory import BaseMemoryStrategy 11 | from .sliding_window_memory import SlidingWindowMemory 12 | from .retrieval_memory import RetrievalMemory 13 | from .utils import get_openai_client 14 | 15 | 16 | class HierarchicalMemory(BaseMemoryStrategy): 17 | """ 18 | Hierarchical memory strategy combining working memory and long-term memory. 19 | 20 | Advantages: 21 | - Multi-level information processing 22 | - Intelligent information promotion 23 | - Combines strengths of multiple strategies 24 | - Resembles human cognitive patterns 25 | 26 | Disadvantages: 27 | - Complex implementation 28 | - Multiple memory systems to manage 29 | - Promotion logic may need tuning 30 | - Higher computational overhead 31 | """ 32 | 33 | def __init__( 34 | self, 35 | window_size: int = 2, 36 | k: int = 2, 37 | embedding_dim: int = 1536, 38 | client: Optional[OpenAI] = None 39 | ): 40 | """ 41 | Initialize hierarchical memory system. 42 | 43 | Args: 44 | window_size: Size of short-term working memory (in turns) 45 | k: Number of documents to retrieve from long-term memory 46 | embedding_dim: Embedding vector dimension for long-term memory 47 | client: Optional OpenAI client instance 48 | """ 49 | print("Initializing Hierarchical Memory...") 50 | self.client = client or get_openai_client() 51 | 52 | # Level 1: Fast, short-term working memory using sliding window 53 | self.working_memory = SlidingWindowMemory(window_size=window_size) 54 | 55 | # Level 2: Slower, persistent long-term memory using retrieval system 56 | self.long_term_memory = RetrievalMemory(k=k, embedding_dim=embedding_dim, client=self.client) 57 | 58 | # Simple heuristic: keywords that trigger promotion from working to long-term memory 59 | self.promotion_keywords = ["remember", "rule", "preference", "always", "never", "allergic", "important"] 60 | 61 | def add_message(self, user_input: str, ai_response: str) -> None: 62 | """ 63 | Add messages to working memory and conditionally promote to long-term memory. 64 | 65 | Args: 66 | user_input: User's message 67 | ai_response: AI's response 68 | """ 69 | # All interactions are added to fast, short-term working memory 70 | self.working_memory.add_message(user_input, ai_response) 71 | 72 | # Promotion logic: check if user input contains keywords indicating 73 | # information is important and should be stored long-term 74 | if any(keyword in user_input.lower() for keyword in self.promotion_keywords): 75 | print(f"--- [Hierarchical Memory: Promoting message to long-term storage.] ---") 76 | # If keywords found, also add interaction to long-term retrieval memory 77 | self.long_term_memory.add_message(user_input, ai_response) 78 | 79 | def get_context(self, query: str) -> str: 80 | """ 81 | Construct rich context by combining relevant information from both memory layers. 82 | 83 | Args: 84 | query: Current user query 85 | 86 | Returns: 87 | Combined context from long-term and short-term memory 88 | """ 89 | # Get recent context from working memory 90 | working_context = self.working_memory.get_context(query) 91 | 92 | # Retrieve relevant content from long-term memory 93 | long_term_context = self.long_term_memory.get_context(query) 94 | 95 | # If no relevant content in long-term memory, use only working memory 96 | if ("No information in memory yet" in long_term_context or 97 | "Could not find any relevant information" in long_term_context): 98 | return f"### Recent Context:\n{working_context}" 99 | else: 100 | # Otherwise, combine both memory layers 101 | return f"### Long-Term Context:\n{long_term_context}\n\n### Recent Context:\n{working_context}" 102 | 103 | def clear(self) -> None: 104 | """Reset both working memory and long-term memory.""" 105 | self.working_memory.clear() 106 | self.long_term_memory.clear() 107 | print("Hierarchical memory cleared.") 108 | 109 | def get_memory_stats(self) -> Dict[str, Any]: 110 | """ 111 | Get statistics about current memory usage from both layers. 112 | 113 | Returns: 114 | Dictionary containing memory statistics 115 | """ 116 | working_stats = self.working_memory.get_memory_stats() 117 | long_term_stats = self.long_term_memory.get_memory_stats() 118 | 119 | return { 120 | "strategy_type": "HierarchicalMemory", 121 | "promotion_keywords": self.promotion_keywords, 122 | "working_memory_stats": working_stats, 123 | "long_term_memory_stats": long_term_stats, 124 | "memory_size": f"Working: {working_stats['memory_size']}, Long-term: {long_term_stats['memory_size']}", 125 | "advantages": ["Multi-level processing", "Intelligent promotion", "Human-like patterns"], 126 | "disadvantages": ["Complex implementation", "Multiple systems", "Overhead"] 127 | } 128 | -------------------------------------------------------------------------------- /memory_strategies/memory_augmented_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Memory-Augmented Memory Strategy 3 | 4 | This strategy simulates memory-enhanced transformer behavior by maintaining 5 | a short-term sliding window of recent conversations and a separate list of 6 | "memory tokens" - important facts extracted from conversations. 7 | """ 8 | 9 | from typing import List, Dict, Any, Optional 10 | from openai import OpenAI 11 | from .base_memory import BaseMemoryStrategy 12 | from .sliding_window_memory import SlidingWindowMemory 13 | from .utils import generate_text, get_openai_client 14 | 15 | 16 | class MemoryAugmentedMemory(BaseMemoryStrategy): 17 | """ 18 | Memory-augmented strategy combining sliding window with persistent memory tokens. 19 | 20 | Advantages: 21 | - Excellent long-term retention of key information 22 | - Suitable for evolving long-term conversations 23 | - Intelligent fact extraction mechanism 24 | - Strong foundation for personal assistants 25 | 26 | Disadvantages: 27 | - More complex implementation 28 | - Additional LLM calls increase cost 29 | - Depends on fact extraction quality 30 | - May increase response time 31 | """ 32 | 33 | def __init__(self, window_size: int = 2, client: Optional[OpenAI] = None): 34 | """ 35 | Initialize memory-augmented system. 36 | 37 | Args: 38 | window_size: Number of recent turns to retain in short-term memory 39 | client: Optional OpenAI client instance 40 | """ 41 | self.client = client or get_openai_client() 42 | 43 | # Use SlidingWindowMemory instance to manage recent conversation history 44 | self.recent_memory = SlidingWindowMemory(window_size=window_size) 45 | 46 | # List to store special, persistent "sticky notes" or key facts 47 | self.memory_tokens: List[str] = [] 48 | 49 | def add_message(self, user_input: str, ai_response: str) -> None: 50 | """ 51 | Add latest turn to recent memory, then use LLM call to decide 52 | if new persistent memory tokens should be created from this interaction. 53 | 54 | Args: 55 | user_input: User's message 56 | ai_response: AI's response 57 | """ 58 | # First, add new interaction to short-term sliding window memory 59 | self.recent_memory.add_message(user_input, ai_response) 60 | 61 | # Construct prompt for LLM to analyze conversation turn and 62 | # determine if it contains core facts worth remembering long-term 63 | fact_extraction_prompt = ( 64 | f"Analyze the following conversation turn. Does it contain a core fact, preference, or decision that should be remembered long-term? " 65 | f"Examples include user preferences ('I hate flying'), key decisions ('The budget is $1000'), or important facts ('My user ID is 12345').\n\n" 66 | f"Conversation Turn:\nUser: {user_input}\nAI: {ai_response}\n\n" 67 | f"If it contains such a fact, state the fact concisely in one sentence. Otherwise, respond with 'No important fact.'" 68 | ) 69 | 70 | # Call LLM to perform fact extraction 71 | extracted_fact = generate_text( 72 | "You are a fact-extraction expert.", 73 | fact_extraction_prompt, 74 | self.client 75 | ) 76 | 77 | # Check if LLM's response indicates an important fact was found 78 | if "no important fact" not in extracted_fact.lower(): 79 | # If fact found, print debug message and add to memory tokens list 80 | print(f"--- [Memory Augmentation: New memory token created: '{extracted_fact}'] ---") 81 | self.memory_tokens.append(extracted_fact) 82 | 83 | def get_context(self, query: str) -> str: 84 | """ 85 | Construct context by combining short-term recent conversation 86 | with list of all long-term, persistent memory tokens. 87 | 88 | Args: 89 | query: Current user query 90 | 91 | Returns: 92 | Combined context from memory tokens and recent conversation 93 | """ 94 | # Get context from short-term sliding window 95 | recent_context = self.recent_memory.get_context(query) 96 | 97 | # Format memory tokens list as readable string 98 | if self.memory_tokens: 99 | memory_token_context = "\n".join([f"- {token}" for token in self.memory_tokens]) 100 | return f"### Key Memory Tokens (Long-Term Facts):\n{memory_token_context}\n\n### Recent Conversation:\n{recent_context}" 101 | else: 102 | return f"### Recent Conversation:\n{recent_context}" 103 | 104 | def clear(self) -> None: 105 | """Reset both recent memory and memory tokens.""" 106 | self.recent_memory.clear() 107 | self.memory_tokens = [] 108 | print("Memory-augmented memory cleared.") 109 | 110 | def get_memory_stats(self) -> Dict[str, Any]: 111 | """ 112 | Get statistics about current memory usage. 113 | 114 | Returns: 115 | Dictionary containing memory statistics 116 | """ 117 | recent_stats = self.recent_memory.get_memory_stats() 118 | num_tokens = len(self.memory_tokens) 119 | 120 | return { 121 | "strategy_type": "MemoryAugmentedMemory", 122 | "memory_tokens": num_tokens, 123 | "recent_memory_stats": recent_stats, 124 | "memory_size": f"{num_tokens} memory tokens + recent window", 125 | "advantages": ["Long-term retention", "Intelligent extraction", "Personal assistant ready"], 126 | "disadvantages": ["Complex implementation", "Additional LLM calls", "Fact extraction dependent"] 127 | } 128 | -------------------------------------------------------------------------------- /memory_strategies/retrieval_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Retrieval-based Memory Strategy 3 | 4 | This strategy implements the core concept of Retrieval-Augmented Generation (RAG). 5 | It converts conversations into vector embeddings and uses similarity search to find 6 | the most relevant historical interactions for any given query. 7 | """ 8 | 9 | import numpy as np 10 | import faiss 11 | from typing import List, Dict, Any, Optional 12 | from openai import OpenAI 13 | from .base_memory import BaseMemoryStrategy 14 | from .utils import generate_embedding, get_openai_client 15 | 16 | 17 | class RetrievalMemory(BaseMemoryStrategy): 18 | """ 19 | Retrieval-based memory strategy using vector embeddings and similarity search. 20 | 21 | Advantages: 22 | - Semantic understanding of queries 23 | - Efficient retrieval of relevant information 24 | - Scalable to large conversation histories 25 | - Industry standard for RAG applications 26 | 27 | Disadvantages: 28 | - Complex implementation 29 | - Requires embedding model 30 | - Dependent on embedding quality 31 | - Additional computational overhead 32 | """ 33 | 34 | def __init__(self, k: int = 2, embedding_dim: int = 1536, client: Optional[OpenAI] = None): 35 | """ 36 | Initialize retrieval memory system. 37 | 38 | Args: 39 | k: Number of most relevant documents to retrieve for a given query 40 | embedding_dim: Dimension of embedding vectors (1536 for text-embedding-3-small) 41 | client: Optional OpenAI client instance 42 | """ 43 | self.k = k 44 | self.embedding_dim = embedding_dim 45 | self.client = client or get_openai_client() 46 | 47 | # List to store original text content of each document 48 | self.documents: List[str] = [] 49 | 50 | # Initialize FAISS index for similarity search 51 | # IndexFlatL2 uses L2 (Euclidean) distance for exhaustive search 52 | self.index = faiss.IndexFlatL2(self.embedding_dim) 53 | 54 | def add_message(self, user_input: str, ai_response: str) -> None: 55 | """ 56 | Add new conversation turn to memory. 57 | 58 | Each part of the turn (user input and AI response) is embedded 59 | and indexed separately for fine-grained retrieval. 60 | 61 | Args: 62 | user_input: User's message 63 | ai_response: AI's response 64 | """ 65 | # Store each part of the turn as separate documents for precise matching 66 | docs_to_add = [ 67 | f"User said: {user_input}", 68 | f"AI responded: {ai_response}" 69 | ] 70 | 71 | for doc in docs_to_add: 72 | # Generate numerical vector representation of the document 73 | embedding = generate_embedding(doc, self.client) 74 | 75 | # Only proceed if embedding was successfully created 76 | if embedding: 77 | # Store original text - index will correspond to vector index in FAISS 78 | self.documents.append(doc) 79 | 80 | # FAISS requires input vectors to be float32 2D numpy arrays 81 | vector = np.array([embedding], dtype='float32') 82 | 83 | # Add vector to FAISS index, making it searchable 84 | self.index.add(vector) 85 | 86 | def get_context(self, query: str) -> str: 87 | """ 88 | Find k most relevant documents from memory based on semantic similarity to query. 89 | 90 | Args: 91 | query: Current user query to find relevant context for 92 | 93 | Returns: 94 | Formatted string containing most relevant retrieved information 95 | """ 96 | # If index has no vectors, there's nothing to search 97 | if self.index.ntotal == 0: 98 | return "No information in memory yet." 99 | 100 | # Convert user query to embedding vector 101 | query_embedding = generate_embedding(query, self.client) 102 | if not query_embedding: 103 | return "Could not process query for retrieval." 104 | 105 | # Convert query embedding to format required by FAISS 106 | query_vector = np.array([query_embedding], dtype='float32') 107 | 108 | # Perform search - returns distances and indices of k nearest neighbors 109 | distances, indices = self.index.search(query_vector, self.k) 110 | 111 | # Use returned indices to retrieve original text documents 112 | # Check for i != -1 because FAISS may return -1 for invalid indices 113 | retrieved_docs = [ 114 | self.documents[i] for i in indices[0] 115 | if i != -1 and i < len(self.documents) 116 | ] 117 | 118 | if not retrieved_docs: 119 | return "Could not find any relevant information in memory." 120 | 121 | # Format retrieved documents as string for use as context 122 | return "### Relevant Information Retrieved from Memory:\n" + "\n---\n".join(retrieved_docs) 123 | 124 | def clear(self) -> None: 125 | """Reset both document storage and FAISS index.""" 126 | self.documents = [] 127 | self.index = faiss.IndexFlatL2(self.embedding_dim) 128 | print("Retrieval memory cleared.") 129 | 130 | def get_memory_stats(self) -> Dict[str, Any]: 131 | """ 132 | Get statistics about current memory usage. 133 | 134 | Returns: 135 | Dictionary containing memory statistics 136 | """ 137 | num_documents = len(self.documents) 138 | num_vectors = self.index.ntotal 139 | 140 | return { 141 | "strategy_type": "RetrievalMemory", 142 | "k": self.k, 143 | "embedding_dim": self.embedding_dim, 144 | "num_documents": num_documents, 145 | "num_vectors": num_vectors, 146 | "memory_size": f"{num_documents} documents, {num_vectors} vectors", 147 | "advantages": ["Semantic search", "Scalable", "Relevant retrieval"], 148 | "disadvantages": ["Complex implementation", "Embedding dependent"] 149 | } 150 | -------------------------------------------------------------------------------- /memory_strategies/summarization_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Summarization Memory Strategy 3 | 4 | This strategy manages long conversations by periodically summarizing conversation history. 5 | It maintains a buffer of recent messages and triggers summarization when the buffer 6 | reaches a threshold, using LLM to compress historical information intelligently. 7 | """ 8 | 9 | from typing import List, Dict, Any, Optional 10 | from openai import OpenAI 11 | from .base_memory import BaseMemoryStrategy 12 | from .utils import generate_text, get_openai_client 13 | 14 | 15 | class SummarizationMemory(BaseMemoryStrategy): 16 | """ 17 | Summarization memory strategy that compresses conversation history using LLM. 18 | 19 | Advantages: 20 | - Manages long conversations efficiently 21 | - Retains key information through intelligent compression 22 | - Scalable token usage 23 | - Maintains conversation flow 24 | 25 | Disadvantages: 26 | - May lose details during summarization 27 | - Depends on LLM summarization quality 28 | - Additional LLM calls increase cost 29 | - Information decay over time 30 | """ 31 | 32 | def __init__(self, summary_threshold: int = 4, client: Optional[OpenAI] = None): 33 | """ 34 | Initialize summarization memory. 35 | 36 | Args: 37 | summary_threshold: Number of messages to accumulate before triggering summary 38 | client: Optional OpenAI client instance 39 | """ 40 | self.summary_threshold = summary_threshold 41 | self.client = client or get_openai_client() 42 | 43 | # Store continuously updated summary of conversation so far 44 | self.running_summary = "" 45 | 46 | # Temporary list to hold recent messages before summarization 47 | self.buffer: List[Dict[str, str]] = [] 48 | 49 | def add_message(self, user_input: str, ai_response: str) -> None: 50 | """ 51 | Add new user-AI interaction to buffer. 52 | 53 | If buffer size reaches threshold, triggers memory consolidation process. 54 | 55 | Args: 56 | user_input: User's message 57 | ai_response: AI's response 58 | """ 59 | # Append latest user and AI messages to temporary buffer 60 | self.buffer.append({"role": "user", "content": user_input}) 61 | self.buffer.append({"role": "assistant", "content": ai_response}) 62 | 63 | # Check if buffer has reached its capacity 64 | if len(self.buffer) >= self.summary_threshold: 65 | # If so, call method to summarize buffer contents 66 | self._consolidate_memory() 67 | 68 | def _consolidate_memory(self) -> None: 69 | """ 70 | Use LLM to summarize buffer contents and merge with existing summary. 71 | 72 | This is the core innovation of the summarization strategy. 73 | """ 74 | print("\n--- [Memory Consolidation Triggered] ---") 75 | 76 | # Convert buffered message list to single formatted string 77 | buffer_text = "\n".join([ 78 | f"{msg['role'].capitalize()}: {msg['content']}" 79 | for msg in self.buffer 80 | ]) 81 | 82 | # Construct specific prompt for LLM to perform summarization task 83 | summarization_prompt = ( 84 | f"You are a summarization expert. Your task is to create a concise summary of a conversation. " 85 | f"Combine the 'Previous Summary' with the 'New Conversation' into a single, updated summary. " 86 | f"Capture all key facts, names, decisions, and important details.\n\n" 87 | f"### Previous Summary:\n{self.running_summary}\n\n" 88 | f"### New Conversation:\n{buffer_text}\n\n" 89 | f"### Updated Summary:" 90 | ) 91 | 92 | # Call LLM with specific system prompt to get new summary 93 | new_summary = generate_text( 94 | "You are an expert summarization engine.", 95 | summarization_prompt, 96 | self.client 97 | ) 98 | 99 | # Replace old summary with newly generated merged summary 100 | self.running_summary = new_summary 101 | 102 | # Clear buffer since its contents are now merged into summary 103 | self.buffer = [] 104 | 105 | print(f"--- [New Summary Generated] ---") 106 | print(f"Summary: {self.running_summary[:100]}...") 107 | 108 | def get_context(self, query: str) -> str: 109 | """ 110 | Construct context to send to LLM by combining long-term summary 111 | with short-term buffer of recent messages. 112 | 113 | Args: 114 | query: Current user query (ignored in this strategy) 115 | 116 | Returns: 117 | Combined context from summary and recent messages 118 | """ 119 | # Format current messages in buffer 120 | buffer_text = "\n".join([ 121 | f"{msg['role'].capitalize()}: {msg['content']}" 122 | for msg in self.buffer 123 | ]) 124 | 125 | # Return combination of historical summary and recent unsummarized messages 126 | if self.running_summary: 127 | return f"### Summary of Past Conversation:\n{self.running_summary}\n\n### Recent Messages:\n{buffer_text}" 128 | else: 129 | return f"### Recent Messages:\n{buffer_text}" if buffer_text else "No conversation history yet." 130 | 131 | def clear(self) -> None: 132 | """Reset both summary and buffer.""" 133 | self.running_summary = "" 134 | self.buffer = [] 135 | print("Summarization memory cleared.") 136 | 137 | def get_memory_stats(self) -> Dict[str, Any]: 138 | """ 139 | Get statistics about current memory usage. 140 | 141 | Returns: 142 | Dictionary containing memory statistics 143 | """ 144 | buffer_messages = len(self.buffer) 145 | has_summary = bool(self.running_summary) 146 | 147 | return { 148 | "strategy_type": "SummarizationMemory", 149 | "summary_threshold": self.summary_threshold, 150 | "buffer_messages": buffer_messages, 151 | "has_summary": has_summary, 152 | "summary_length": len(self.running_summary) if has_summary else 0, 153 | "memory_size": f"Summary + {buffer_messages} buffered messages", 154 | "advantages": ["Efficient compression", "Retains key info", "Scalable"], 155 | "disadvantages": ["May lose details", "LLM dependent", "Additional cost"] 156 | } 157 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI Agent Memory Design & Optimization Playground 2 | 3 | [![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)](https://python.org) 4 | [![Streamlit](https://img.shields.io/badge/Streamlit-1.28+-red.svg)](https://streamlit.io) 5 | [![FastAPI](https://img.shields.io/badge/FastAPI-0.104+-green.svg)](https://fastapi.tiangolo.com) 6 | [![License](https://img.shields.io/badge/License-Apache%202.0-yellow.svg)](LICENSE) 7 | 8 | > **Interactive playground for testing and comparing 9 different AI agent memory optimization strategies** 9 | 10 | ![AI Agent Memory Playground](sct.png) 11 | 12 | ## Overview 13 | 14 | This project implements **9 different memory optimization techniques** for AI agents, providing a comprehensive solution for managing conversation history and context in production AI systems. Each strategy is implemented as a modular, plug-and-play class with a unified interface. 15 | 16 | ### Why Memory Optimization Matters 17 | 18 | - **Token Cost Reduction**: Prevent exponential growth in LLM API costs 19 | - **Context Preservation**: Maintain relevant information across conversations 20 | - **Scalability**: Handle long conversations efficiently 21 | - **Performance**: Optimize response times and memory usage 22 | 23 | ## Memory Strategies Implemented 24 | 25 | ### Basic Strategies 26 | 1. **Sequential Memory** - Complete conversation history storage 27 | 2. **Sliding Window Memory** - Fixed-size recent conversation window 28 | 3. **Summarization Memory** - LLM-based conversation compression 29 | 30 | ### Advanced Strategies 31 | 4. **Retrieval Memory (RAG)** - Vector similarity search for semantic retrieval 32 | 5. **Memory-Augmented Memory** - Persistent memory tokens with sliding window 33 | 6. **Hierarchical Memory** - Multi-layered working + long-term memory 34 | 35 | ### Complex Strategies 36 | 7. **Graph Memory** - Knowledge graph with entity relationships 37 | 8. **Compression Memory** - Intelligent compression with importance scoring 38 | 9. **OS-like Memory** - RAM/disk simulation with paging mechanisms 39 | 40 | ## Features 41 | 42 | - **Modular Architecture** - Strategy pattern for easy swapping 43 | - **Interactive Playground** - Streamlit web interface for testing 44 | - **Performance Analytics** - Token usage and response time tracking 45 | - **Batch Comparison** - Test multiple strategies simultaneously 46 | - **Production Ready** - FastAPI endpoints for deployment 47 | - **Real-time Metrics** - Memory statistics and performance monitoring 48 | 49 | ## Installation 50 | 51 | ### Prerequisites 52 | - Python 3.10+ 53 | - OpenAI API Key 54 | 55 | ### Setup 56 | 57 | 1. **Clone the repository** 58 | ```bash 59 | git clone https://github.com/AIAnytime/Agent-Memory-Playground.git 60 | cd Agent-Memory-Playground 61 | ``` 62 | 63 | 2. **Install dependencies** 64 | ```bash 65 | pip install -r requirements.txt 66 | ``` 67 | 68 | 3. **Configure environment** 69 | ```bash 70 | # Create .env file 71 | echo "OPENAI_API_KEY=your_openai_api_key_here" > .env 72 | ``` 73 | 74 | ## Quick Start 75 | 76 | ### 1. Interactive Playground (Streamlit) 77 | ```bash 78 | streamlit run streamlit_playground.py 79 | ``` 80 | - Open http://localhost:8501 in your browser 81 | - Enter your OpenAI API key in the sidebar 82 | - Select a memory strategy and start testing! 83 | 84 | ### 2. API Server (FastAPI) 85 | ```bash 86 | uvicorn api:app --reload 87 | ``` 88 | - API documentation: http://localhost:8000/docs 89 | - Create sessions, chat, and monitor performance via REST API 90 | 91 | ### 3. Command Line Example 92 | ```bash 93 | python example_usage.py 94 | ``` 95 | - Interactive CLI for testing all memory strategies 96 | - Detailed memory statistics and performance metrics 97 | 98 | ## Usage Examples 99 | 100 | ### Basic Usage 101 | ```python 102 | from memory_strategies import SequentialMemory, AIAgent 103 | 104 | # Initialize memory strategy 105 | memory = SequentialMemory() 106 | agent = AIAgent(memory_strategy=memory) 107 | 108 | # Chat with the agent 109 | response = agent.chat("Hello! My name is Alex.") 110 | print(response["ai_response"]) 111 | 112 | # Memory automatically preserved for next interaction 113 | response = agent.chat("What's my name?") 114 | print(response["ai_response"]) # Will remember "Alex" 115 | ``` 116 | 117 | ### Advanced RAG Implementation 118 | ```python 119 | from memory_strategies import RetrievalMemory, AIAgent 120 | 121 | # Initialize RAG-based memory 122 | memory = RetrievalMemory(k=3) # Retrieve top 3 similar conversations 123 | agent = AIAgent(memory_strategy=memory) 124 | 125 | # Build conversation history 126 | agent.chat("I'm a software engineer working on ML projects") 127 | agent.chat("I prefer Python and love coffee") 128 | agent.chat("I'm building a recommendation system") 129 | 130 | # Query with semantic similarity 131 | response = agent.chat("What do you know about my work?") 132 | # Will retrieve relevant context about ML, Python, and recommendation systems 133 | ``` 134 | 135 | ### Production API Usage 136 | ```bash 137 | # Create a session with hierarchical memory 138 | curl -X POST "http://localhost:8000/sessions" \ 139 | -H "Content-Type: application/json" \ 140 | -d '{ 141 | "strategy_type": "hierarchical", 142 | "system_prompt": "You are a helpful AI assistant.", 143 | "api_key": "your_openai_key" 144 | }' 145 | 146 | # Chat with the session 147 | curl -X POST "http://localhost:8000/sessions/{session_id}/chat" \ 148 | -H "Content-Type: application/json" \ 149 | -d '{ 150 | "message": "Remember that I prefer concise responses", 151 | "api_key": "your_openai_key" 152 | }' 153 | ``` 154 | 155 | ## Performance Comparison 156 | 157 | | Strategy | Token Efficiency | Retrieval Speed | Memory Usage | Best For | 158 | |----------|------------------|-----------------|--------------|----------| 159 | | Sequential | ❌ Low | ⚡ Instant | 📈 High | Short conversations | 160 | | Sliding Window | ✅ High | ⚡ Instant | 📊 Constant | Real-time chat | 161 | | Retrieval (RAG) | ✅ High | 🔍 Fast | 📊 Medium | Production systems | 162 | | Hierarchical | ✅ Very High | 🔍 Fast | 📊 Medium | Complex applications | 163 | | Graph Memory | 🔍 Medium | 🐌 Slow | 📈 High | Knowledge systems | 164 | 165 | ## Architecture 166 | 167 | ### Strategy Pattern Design 168 | ``` 169 | AIAgent 170 | ├── BaseMemoryStrategy (Abstract) 171 | │ ├── add_message() 172 | │ ├── get_context() 173 | │ └── clear() 174 | ├── SequentialMemory 175 | ├── SlidingWindowMemory 176 | ├── RetrievalMemory 177 | └── ... (6 more strategies) 178 | ``` 179 | 180 | ### Key Components 181 | - **Memory Strategies**: Modular memory implementations 182 | - **AI Agent**: Core agent using strategy pattern 183 | - **Utilities**: Token counting, embeddings, LLM integration 184 | - **API Layer**: FastAPI endpoints for production use 185 | - **Playground**: Streamlit interface for testing 186 | 187 | ## Monitoring & Metrics 188 | 189 | Track essential performance metrics: 190 | 191 | ```python 192 | { 193 | "total_content_tokens": 1250, # Raw conversation data 194 | "total_prompt_tokens": 4800, # Actual LLM costs 195 | "average_retrieval_time": 0.15, # Memory access speed 196 | "memory_efficiency": 0.73, # Compression ratio 197 | "context_relevance_score": 0.89 # Quality of retrieved context 198 | } 199 | ``` 200 | 201 | ## Configuration 202 | 203 | ### Memory Strategy Parameters 204 | 205 | **Sliding Window Memory** 206 | ```python 207 | SlidingWindowMemory(window_size=4) # Keep last 4 conversation turns 208 | ``` 209 | 210 | **Retrieval Memory (RAG)** 211 | ```python 212 | RetrievalMemory(k=3) # Retrieve top 3 similar conversations 213 | ``` 214 | 215 | **Hierarchical Memory** 216 | ```python 217 | HierarchicalMemory( 218 | window_size=2, # Working memory size 219 | k=3 # Long-term retrieval count 220 | ) 221 | ``` 222 | 223 | ## Production Deployment 224 | 225 | ### Docker Deployment 226 | ```dockerfile 227 | FROM python:3.9-slim 228 | 229 | WORKDIR /app 230 | COPY requirements.txt . 231 | RUN pip install -r requirements.txt 232 | 233 | COPY . . 234 | EXPOSE 8000 235 | 236 | CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"] 237 | ``` 238 | 239 | ### Environment Variables 240 | ```bash 241 | OPENAI_API_KEY=your_openai_api_key 242 | OPENAI_MODEL=gpt-4o-mini 243 | EMBEDDING_MODEL=text-embedding-3-small 244 | ``` 245 | 246 | ## Testing 247 | 248 | Run the test suite: 249 | ```bash 250 | python -m pytest tests/ 251 | ``` 252 | 253 | Run performance benchmarks: 254 | ```bash 255 | python benchmark.py 256 | ``` 257 | 258 | ## Documentation 259 | 260 | - **[Technical Guide](AI_Agent_Memory_Documentation.md)** - Comprehensive implementation details 261 | - **[API Documentation](http://localhost:8000/docs)** - FastAPI interactive docs 262 | - **[Strategy Comparison](docs/strategy-comparison.md)** - Performance analysis 263 | - **[Production Guide](docs/production-guide.md)** - Deployment best practices 264 | 265 | ## Contributing 266 | 267 | We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details. 268 | 269 | 1. Fork the repository 270 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`) 271 | 3. Commit your changes (`git commit -m 'Add amazing feature'`) 272 | 4. Push to the branch (`git push origin feature/amazing-feature`) 273 | 5. Open a Pull Request 274 | 275 | ## License 276 | 277 | This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details. 278 | 279 | ## Acknowledgments 280 | 281 | - **OpenAI** for providing the GPT models and embeddings 282 | - **Streamlit** for the amazing web framework 283 | - **FastAPI** for the high-performance API framework 284 | - **FAISS** for efficient vector similarity search 285 | 286 | ## Support & Contact 287 | 288 | - **Website**: [aianytime.net](https://aianytime.net) 289 | - **Creator Portfolio**: [sonukumar.site](https://sonukumar.site) 290 | - **YouTube**: [@AIAnytime](https://www.youtube.com/@AIAnytime) 291 | - **Issues**: [GitHub Issues](https://github.com/AIAnytime/Agent-Memory-Playground/issues) 292 | 293 | --- 294 | 295 |
296 |

Built with ❤️ by AI Anytime

297 |

Star this repo if you find it helpful!

298 |
299 | -------------------------------------------------------------------------------- /memory_strategies/graph_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Graph Memory Network Strategy 3 | 4 | This strategy treats conversation elements as nodes and their relationships as edges, 5 | enabling complex reasoning and relationship understanding. Particularly suited for 6 | expert systems and knowledge base applications. 7 | """ 8 | 9 | import networkx as nx 10 | from typing import List, Dict, Any, Optional, Set 11 | from openai import OpenAI 12 | from .base_memory import BaseMemoryStrategy 13 | from .utils import generate_text, get_openai_client 14 | 15 | 16 | class GraphMemory(BaseMemoryStrategy): 17 | """ 18 | Graph-based memory strategy using NetworkX for relationship modeling. 19 | 20 | Advantages: 21 | - Models complex relationships between information 22 | - Supports logical reasoning queries 23 | - Structured knowledge representation 24 | - Excellent for expert systems 25 | 26 | Disadvantages: 27 | - Complex implementation and maintenance 28 | - Requires relationship extraction 29 | - May be overkill for simple conversations 30 | - Computational overhead for large graphs 31 | """ 32 | 33 | def __init__(self, client: Optional[OpenAI] = None): 34 | """ 35 | Initialize graph memory system. 36 | 37 | Args: 38 | client: Optional OpenAI client instance 39 | """ 40 | self.client = client or get_openai_client() 41 | 42 | # Initialize directed graph to store conversation elements and relationships 43 | self.knowledge_graph = nx.DiGraph() 44 | 45 | # Counter for generating unique node IDs 46 | self.node_counter = 0 47 | 48 | # Store raw conversation history for fallback 49 | self.conversation_history: List[Dict[str, str]] = [] 50 | 51 | def add_message(self, user_input: str, ai_response: str) -> None: 52 | """ 53 | Add conversation turn to graph by extracting entities and relationships. 54 | 55 | Args: 56 | user_input: User's message 57 | ai_response: AI's response 58 | """ 59 | # Store raw conversation for fallback 60 | self.conversation_history.append({ 61 | "user": user_input, 62 | "assistant": ai_response, 63 | "turn_id": self.node_counter 64 | }) 65 | 66 | # Extract entities and relationships from the conversation turn 67 | self._extract_and_add_entities(user_input, "user", self.node_counter) 68 | self._extract_and_add_entities(ai_response, "assistant", self.node_counter) 69 | 70 | self.node_counter += 1 71 | 72 | def _extract_and_add_entities(self, text: str, speaker: str, turn_id: int) -> None: 73 | """ 74 | Extract entities and relationships from text and add to knowledge graph. 75 | 76 | Args: 77 | text: Text to extract entities from 78 | speaker: Who said the text (user/assistant) 79 | turn_id: Turn identifier 80 | """ 81 | # Use LLM to extract key entities and relationships 82 | extraction_prompt = ( 83 | f"Extract key entities (people, places, concepts, facts) and relationships from this text. " 84 | f"Format as: ENTITIES: entity1, entity2, entity3... RELATIONSHIPS: entity1->relationship->entity2, etc.\n\n" 85 | f"Text: {text}\n\n" 86 | f"If no clear entities or relationships, respond with 'ENTITIES: none RELATIONSHIPS: none'" 87 | ) 88 | 89 | extracted_info = generate_text( 90 | "You are an entity and relationship extraction expert.", 91 | extraction_prompt, 92 | self.client 93 | ) 94 | 95 | # Parse extracted information and add to graph 96 | self._parse_and_add_to_graph(extracted_info, speaker, turn_id, text) 97 | 98 | def _parse_and_add_to_graph(self, extracted_info: str, speaker: str, turn_id: int, original_text: str) -> None: 99 | """ 100 | Parse extracted entities and relationships and add them to the knowledge graph. 101 | 102 | Args: 103 | extracted_info: LLM-extracted entities and relationships 104 | speaker: Who said the text 105 | turn_id: Turn identifier 106 | original_text: Original text for context 107 | """ 108 | try: 109 | # Simple parsing of the extraction format 110 | if "ENTITIES:" in extracted_info and "RELATIONSHIPS:" in extracted_info: 111 | parts = extracted_info.split("RELATIONSHIPS:") 112 | entities_part = parts[0].replace("ENTITIES:", "").strip() 113 | relationships_part = parts[1].strip() if len(parts) > 1 else "" 114 | 115 | # Add entities as nodes 116 | if entities_part.lower() != "none": 117 | entities = [e.strip() for e in entities_part.split(",") if e.strip()] 118 | for entity in entities: 119 | if entity: 120 | # Add entity node with metadata 121 | self.knowledge_graph.add_node( 122 | entity, 123 | type="entity", 124 | speaker=speaker, 125 | turn_id=turn_id, 126 | context=original_text[:100] # First 100 chars for context 127 | ) 128 | 129 | # Add relationships as edges 130 | if relationships_part.lower() != "none": 131 | relationships = [r.strip() for r in relationships_part.split(",") if r.strip()] 132 | for rel in relationships: 133 | if "->" in rel: 134 | parts = rel.split("->") 135 | if len(parts) == 3: 136 | source, relation, target = [p.strip() for p in parts] 137 | if source and target and relation: 138 | # Add relationship edge 139 | self.knowledge_graph.add_edge( 140 | source, target, 141 | relationship=relation, 142 | turn_id=turn_id, 143 | speaker=speaker 144 | ) 145 | except Exception as e: 146 | print(f"Error parsing extracted info: {e}") 147 | 148 | def get_context(self, query: str) -> str: 149 | """ 150 | Retrieve relevant context by traversing the knowledge graph. 151 | 152 | Args: 153 | query: Current user query 154 | 155 | Returns: 156 | Relevant context from knowledge graph and conversation history 157 | """ 158 | if self.knowledge_graph.number_of_nodes() == 0: 159 | return "No information in memory yet." 160 | 161 | # Extract entities from the query 162 | query_extraction_prompt = ( 163 | f"Extract key entities (people, places, concepts) from this query. " 164 | f"List them separated by commas. If no clear entities, respond with 'none'.\n\n" 165 | f"Query: {query}" 166 | ) 167 | 168 | query_entities = generate_text( 169 | "You are an entity extraction expert.", 170 | query_extraction_prompt, 171 | self.client 172 | ) 173 | 174 | relevant_info = [] 175 | 176 | # Find relevant nodes and relationships 177 | if query_entities.lower() != "none": 178 | entities = [e.strip() for e in query_entities.split(",") if e.strip()] 179 | 180 | for entity in entities: 181 | # Find exact matches or similar entities in graph 182 | for node in self.knowledge_graph.nodes(): 183 | if entity.lower() in node.lower() or node.lower() in entity.lower(): 184 | # Get node information 185 | node_data = self.knowledge_graph.nodes[node] 186 | relevant_info.append(f"Entity: {node} (from {node_data.get('speaker', 'unknown')})") 187 | 188 | # Get relationships involving this node 189 | for neighbor in self.knowledge_graph.neighbors(node): 190 | edge_data = self.knowledge_graph.edges[node, neighbor] 191 | relationship = edge_data.get('relationship', 'related to') 192 | relevant_info.append(f" → {relationship} → {neighbor}") 193 | 194 | # Fallback to recent conversation if no graph matches 195 | if not relevant_info: 196 | recent_turns = self.conversation_history[-3:] # Last 3 turns 197 | for turn in recent_turns: 198 | relevant_info.append(f"Turn {turn['turn_id']}: User: {turn['user']}") 199 | relevant_info.append(f"Turn {turn['turn_id']}: Assistant: {turn['assistant']}") 200 | 201 | return "### Knowledge Graph Context:\n" + "\n".join(relevant_info) if relevant_info else "No relevant information found." 202 | 203 | def clear(self) -> None: 204 | """Reset the knowledge graph and conversation history.""" 205 | self.knowledge_graph.clear() 206 | self.conversation_history = [] 207 | self.node_counter = 0 208 | print("Graph memory cleared.") 209 | 210 | def get_memory_stats(self) -> Dict[str, Any]: 211 | """ 212 | Get statistics about the knowledge graph. 213 | 214 | Returns: 215 | Dictionary containing memory statistics 216 | """ 217 | num_nodes = self.knowledge_graph.number_of_nodes() 218 | num_edges = self.knowledge_graph.number_of_edges() 219 | num_turns = len(self.conversation_history) 220 | 221 | return { 222 | "strategy_type": "GraphMemory", 223 | "num_nodes": num_nodes, 224 | "num_edges": num_edges, 225 | "num_turns": num_turns, 226 | "memory_size": f"{num_nodes} nodes, {num_edges} edges, {num_turns} turns", 227 | "advantages": ["Relationship modeling", "Complex reasoning", "Structured knowledge"], 228 | "disadvantages": ["Complex implementation", "Extraction dependent", "Computational overhead"] 229 | } 230 | -------------------------------------------------------------------------------- /example_usage.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI Agent Memory Design & Optimization - Example Usage 3 | 4 | This file demonstrates how to use multiple memory optimization techniques 5 | in a plug-and-play manner. Each strategy can be easily swapped and tested. 6 | """ 7 | 8 | import os 9 | import time 10 | from memory_strategies import ( 11 | AIAgent, 12 | SequentialMemory, 13 | SlidingWindowMemory, 14 | SummarizationMemory, 15 | RetrievalMemory, 16 | MemoryAugmentedMemory, 17 | HierarchicalMemory, 18 | GraphMemory, 19 | CompressionMemory, 20 | OSMemory, 21 | STRATEGY_INFO 22 | ) 23 | 24 | 25 | def demo_strategy(strategy_class, strategy_name, test_conversations, **kwargs): 26 | """ 27 | Demonstrate a specific memory strategy with test conversations. 28 | 29 | Args: 30 | strategy_class: Memory strategy class to test 31 | strategy_name: Name of the strategy for display 32 | test_conversations: List of user inputs to test 33 | **kwargs: Additional arguments for strategy initialization 34 | """ 35 | print(f"\n{'='*60}") 36 | print(f"TESTING: {strategy_name}") 37 | print(f"{'='*60}") 38 | 39 | # Display strategy information 40 | info = STRATEGY_INFO.get(strategy_class.__name__, {}) 41 | print(f"Complexity: {info.get('complexity', 'Unknown')}") 42 | print(f"Description: {info.get('description', 'No description')}") 43 | print(f"Best for: {info.get('best_for', 'General use')}") 44 | print() 45 | 46 | try: 47 | # Initialize strategy and agent 48 | memory_strategy = strategy_class(**kwargs) 49 | agent = AIAgent(memory_strategy, system_prompt="You are a helpful AI assistant with memory.") 50 | 51 | # Run test conversations 52 | for i, user_input in enumerate(test_conversations, 1): 53 | print(f"\n--- Conversation Turn {i} ---") 54 | result = agent.chat(user_input, verbose=True) 55 | 56 | # Add small delay for readability 57 | time.sleep(0.5) 58 | 59 | # Display memory statistics 60 | print(f"\nMemory Statistics:") 61 | stats = agent.get_memory_stats() 62 | for key, value in stats.items(): 63 | if key not in ['advantages', 'disadvantages']: 64 | print(f" {key}: {value}") 65 | 66 | print(f"\nAdvantages: {', '.join(stats.get('advantages', []))}") 67 | print(f"Disadvantages: {', '.join(stats.get('disadvantages', []))}") 68 | 69 | except Exception as e: 70 | print(f"Error testing {strategy_name}: {str(e)}") 71 | 72 | print(f"\n{'='*60}") 73 | 74 | 75 | def run_comprehensive_demo(): 76 | """ 77 | Run comprehensive demonstration of all memory strategies. 78 | """ 79 | print("AI Agent Memory Design & Optimization - Comprehensive Demo") 80 | print("=" * 60) 81 | 82 | # Test conversations that showcase different memory capabilities 83 | test_conversations = [ 84 | "Hi! My name is Alex and I'm a software engineer.", 85 | "I'm working on a machine learning project about natural language processing.", 86 | "My favorite programming language is Python, and I prefer coffee over tea.", 87 | "Can you remember what my name is and what I'm working on?", 88 | "What do you know about my preferences?" 89 | ] 90 | 91 | # Test each strategy 92 | strategies_to_test = [ 93 | (SequentialMemory, "Sequential Memory", {}), 94 | (SlidingWindowMemory, "Sliding Window Memory", {"window_size": 3}), 95 | (SummarizationMemory, "Summarization Memory", {"summary_threshold": 4}), 96 | (RetrievalMemory, "Retrieval Memory", {"k": 2}), 97 | (MemoryAugmentedMemory, "Memory-Augmented Memory", {"window_size": 2}), 98 | (HierarchicalMemory, "Hierarchical Memory", {"window_size": 2, "k": 2}), 99 | (GraphMemory, "Graph Memory", {}), 100 | (CompressionMemory, "Compression Memory", {"compression_ratio": 0.6}), 101 | (OSMemory, "OS-like Memory", {"ram_size": 2}) 102 | ] 103 | 104 | for strategy_class, strategy_name, kwargs in strategies_to_test: 105 | demo_strategy(strategy_class, strategy_name, test_conversations, **kwargs) 106 | 107 | # Ask user if they want to continue 108 | user_input = input("\n🤔 Continue to next strategy? (y/n/q to quit): ").lower() 109 | if user_input == 'q': 110 | break 111 | elif user_input == 'n': 112 | continue 113 | 114 | 115 | def interactive_strategy_tester(): 116 | """ 117 | Interactive mode for testing specific strategies. 118 | """ 119 | print("\nInteractive Strategy Tester") 120 | print("=" * 40) 121 | 122 | # Display available strategies 123 | strategies = { 124 | "1": (SequentialMemory, "Sequential Memory", {}), 125 | "2": (SlidingWindowMemory, "Sliding Window Memory", {"window_size": 3}), 126 | "3": (SummarizationMemory, "Summarization Memory", {"summary_threshold": 4}), 127 | "4": (RetrievalMemory, "Retrieval Memory", {"k": 2}), 128 | "5": (MemoryAugmentedMemory, "Memory-Augmented Memory", {"window_size": 2}), 129 | "6": (HierarchicalMemory, "Hierarchical Memory", {"window_size": 2, "k": 2}), 130 | "7": (GraphMemory, "Graph Memory", {}), 131 | "8": (CompressionMemory, "Compression Memory", {"compression_ratio": 0.6}), 132 | "9": (OSMemory, "OS-like Memory", {"ram_size": 2}) 133 | } 134 | 135 | print("Available Memory Strategies:") 136 | for key, (_, name, _) in strategies.items(): 137 | print(f" {key}. {name}") 138 | 139 | while True: 140 | choice = input("\nSelect a strategy (1-9) or 'q' to quit: ").strip() 141 | 142 | if choice.lower() == 'q': 143 | break 144 | 145 | if choice in strategies: 146 | strategy_class, strategy_name, kwargs = strategies[choice] 147 | 148 | try: 149 | # Initialize strategy and agent 150 | memory_strategy = strategy_class(**kwargs) 151 | agent = AIAgent(memory_strategy, system_prompt="You are a helpful AI assistant.") 152 | 153 | print(f"\nNow using: {strategy_name}") 154 | print("Type 'stats' to see memory statistics, 'clear' to clear memory, 'back' to choose another strategy") 155 | 156 | while True: 157 | user_input = input("\nYou: ").strip() 158 | 159 | if user_input.lower() == 'back': 160 | break 161 | elif user_input.lower() == 'stats': 162 | stats = agent.get_memory_stats() 163 | print("\nMemory Statistics:") 164 | for key, value in stats.items(): 165 | print(f" {key}: {value}") 166 | elif user_input.lower() == 'clear': 167 | agent.clear_memory() 168 | elif user_input: 169 | result = agent.chat(user_input, verbose=False) 170 | print(f"AI: {result['ai_response']}") 171 | print(f"Response time: {result['generation_time']:.2f}s | Tokens: {result['prompt_tokens']}") 172 | 173 | except Exception as e: 174 | print(f"Error with {strategy_name}: {str(e)}") 175 | else: 176 | print("Invalid choice. Please select 1-9 or 'q'.") 177 | 178 | 179 | def quick_comparison_demo(): 180 | """ 181 | Quick comparison of key strategies on the same conversation. 182 | """ 183 | print("\n⚡ Quick Comparison Demo") 184 | print("=" * 40) 185 | 186 | # Single conversation to test all strategies 187 | test_conversation = [ 188 | "Remember this important fact: I am allergic to peanuts.", 189 | "I love traveling and have been to Japan, France, and Italy.", 190 | "My favorite hobby is photography, especially landscape photography.", 191 | "What do you know about my allergy and travel experiences?" 192 | ] 193 | 194 | # Key strategies to compare 195 | comparison_strategies = [ 196 | (SequentialMemory, "Sequential", {}), 197 | (SlidingWindowMemory, "Sliding Window", {"window_size": 2}), 198 | (RetrievalMemory, "Retrieval (RAG)", {"k": 2}), 199 | (HierarchicalMemory, "Hierarchical", {"window_size": 2, "k": 2}) 200 | ] 201 | 202 | results = {} 203 | 204 | for strategy_class, strategy_name, kwargs in comparison_strategies: 205 | print(f"\nTesting {strategy_name}...") 206 | 207 | try: 208 | memory_strategy = strategy_class(**kwargs) 209 | agent = AIAgent(memory_strategy, system_prompt="You are a helpful assistant.") 210 | 211 | # Run all conversations 212 | for user_input in test_conversation: 213 | result = agent.chat(user_input, verbose=False) 214 | 215 | # Store final result for comparison 216 | results[strategy_name] = { 217 | "final_response": result['ai_response'], 218 | "final_tokens": result['prompt_tokens'], 219 | "memory_stats": agent.get_memory_stats() 220 | } 221 | 222 | except Exception as e: 223 | results[strategy_name] = {"error": str(e)} 224 | 225 | # Display comparison 226 | print(f"\nCOMPARISON RESULTS") 227 | print("=" * 50) 228 | 229 | for strategy_name, data in results.items(): 230 | print(f"\n{strategy_name}:") 231 | if "error" in data: 232 | print(f" Error: {data['error']}") 233 | else: 234 | print(f" Response: {data['final_response'][:100]}...") 235 | print(f" Tokens: {data['final_tokens']}") 236 | print(f" Memory: {data['memory_stats'].get('memory_size', 'Unknown')}") 237 | 238 | 239 | def main(): 240 | """ 241 | Main function with menu-driven interface. 242 | """ 243 | print("AI Agent Memory Design & Optimization - Demo Suite") 244 | print("=" * 50) 245 | 246 | # Check if OpenAI API key is available 247 | if not os.getenv("OPENAI_API_KEY"): 248 | print("Error: OPENAI_API_KEY not found in environment variables.") 249 | print("Please set your OpenAI API key in the .env file.") 250 | return 251 | 252 | while True: 253 | print("\nChoose a demo mode:") 254 | print("1. Comprehensive Demo (all strategies)") 255 | print("2. Interactive Tester (choose strategy)") 256 | print("3. Quick Comparison (key strategies)") 257 | print("4. Exit") 258 | 259 | choice = input("\nEnter your choice (1-4): ").strip() 260 | 261 | if choice == "1": 262 | run_comprehensive_demo() 263 | elif choice == "2": 264 | interactive_strategy_tester() 265 | elif choice == "3": 266 | quick_comparison_demo() 267 | elif choice == "4": 268 | print("👋 Goodbye!") 269 | break 270 | else: 271 | print("❌ Invalid choice. Please select 1-4.") 272 | 273 | 274 | if __name__ == "__main__": 275 | main() 276 | -------------------------------------------------------------------------------- /memory_strategies/compression_memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Memory Compression and Integration Strategy 3 | 4 | This strategy compresses and integrates historical conversations through intelligent 5 | algorithms, significantly reducing storage space and processing overhead while 6 | retaining key information through multi-level compression mechanisms. 7 | """ 8 | 9 | import json 10 | from typing import List, Dict, Any, Optional 11 | from openai import OpenAI 12 | from .base_memory import BaseMemoryStrategy 13 | from .utils import generate_text, get_openai_client, count_tokens 14 | 15 | 16 | class CompressionMemory(BaseMemoryStrategy): 17 | """ 18 | Memory compression strategy with intelligent information integration. 19 | 20 | Advantages: 21 | - Significant storage space reduction 22 | - Intelligent information merging 23 | - Dynamic importance scoring 24 | - Automatic redundancy filtering 25 | 26 | Disadvantages: 27 | - Complex compression algorithms 28 | - Potential information loss 29 | - Computational overhead for compression 30 | - Tuning required for optimal performance 31 | """ 32 | 33 | def __init__( 34 | self, 35 | compression_ratio: float = 0.5, 36 | importance_threshold: float = 0.7, 37 | client: Optional[OpenAI] = None 38 | ): 39 | """ 40 | Initialize compression memory system. 41 | 42 | Args: 43 | compression_ratio: Target compression ratio (0.5 = 50% compression) 44 | importance_threshold: Threshold for importance scoring (0-1) 45 | client: Optional OpenAI client instance 46 | """ 47 | self.compression_ratio = compression_ratio 48 | self.importance_threshold = importance_threshold 49 | self.client = client or get_openai_client() 50 | 51 | # Store conversation segments with metadata 52 | self.memory_segments: List[Dict[str, Any]] = [] 53 | 54 | # Compressed memory storage 55 | self.compressed_memory: List[Dict[str, Any]] = [] 56 | 57 | # Track compression statistics 58 | self.compression_stats = { 59 | "original_tokens": 0, 60 | "compressed_tokens": 0, 61 | "compression_count": 0 62 | } 63 | 64 | def add_message(self, user_input: str, ai_response: str) -> None: 65 | """ 66 | Add new conversation turn with importance scoring and compression triggers. 67 | 68 | Args: 69 | user_input: User's message 70 | ai_response: AI's response 71 | """ 72 | # Calculate importance score for this conversation turn 73 | importance_score = self._calculate_importance_score(user_input, ai_response) 74 | 75 | # Create memory segment with metadata 76 | segment = { 77 | "user_input": user_input, 78 | "ai_response": ai_response, 79 | "importance_score": importance_score, 80 | "timestamp": len(self.memory_segments), 81 | "token_count": count_tokens(user_input + ai_response), 82 | "compressed": False 83 | } 84 | 85 | self.memory_segments.append(segment) 86 | self.compression_stats["original_tokens"] += segment["token_count"] 87 | 88 | # Trigger compression if we have enough segments 89 | if len(self.memory_segments) >= 6: # Compress every 6 segments 90 | self._compress_memory_segments() 91 | 92 | def _calculate_importance_score(self, user_input: str, ai_response: str) -> float: 93 | """ 94 | Calculate importance score for a conversation turn using LLM. 95 | 96 | Args: 97 | user_input: User's message 98 | ai_response: AI's response 99 | 100 | Returns: 101 | Importance score between 0 and 1 102 | """ 103 | scoring_prompt = ( 104 | f"Rate the importance of this conversation turn on a scale of 0.0 to 1.0. " 105 | f"Consider factors like: factual information, user preferences, decisions, " 106 | f"emotional significance, and future relevance. " 107 | f"Respond with only a number between 0.0 and 1.0.\n\n" 108 | f"User: {user_input}\n" 109 | f"AI: {ai_response}" 110 | ) 111 | 112 | try: 113 | score_text = generate_text( 114 | "You are an importance scoring expert.", 115 | scoring_prompt, 116 | self.client 117 | ) 118 | # Extract numeric score from response 119 | score = float(score_text.strip()) 120 | return max(0.0, min(1.0, score)) # Clamp between 0 and 1 121 | except: 122 | return 0.5 # Default moderate importance 123 | 124 | def _compress_memory_segments(self) -> None: 125 | """ 126 | Compress memory segments using intelligent algorithms. 127 | """ 128 | print("--- [Memory Compression: Compressing memory segments] ---") 129 | 130 | # Separate high and low importance segments 131 | high_importance = [s for s in self.memory_segments if s["importance_score"] >= self.importance_threshold] 132 | low_importance = [s for s in self.memory_segments if s["importance_score"] < self.importance_threshold] 133 | 134 | # Compress low importance segments 135 | if low_importance: 136 | compressed_segment = self._semantic_compression(low_importance) 137 | self.compressed_memory.append(compressed_segment) 138 | 139 | # Keep high importance segments with minimal compression 140 | for segment in high_importance: 141 | segment["compressed"] = True 142 | self.compressed_memory.append({ 143 | "type": "high_importance", 144 | "content": f"User: {segment['user_input']}\nAI: {segment['ai_response']}", 145 | "importance_score": segment["importance_score"], 146 | "timestamp": segment["timestamp"] 147 | }) 148 | 149 | # Clear processed segments 150 | self.memory_segments = [] 151 | self.compression_stats["compression_count"] += 1 152 | 153 | def _semantic_compression(self, segments: List[Dict[str, Any]]) -> Dict[str, Any]: 154 | """ 155 | Perform semantic-level compression on low importance segments. 156 | 157 | Args: 158 | segments: List of memory segments to compress 159 | 160 | Returns: 161 | Compressed segment dictionary 162 | """ 163 | # Combine all low importance conversations 164 | combined_text = "\n".join([ 165 | f"User: {s['user_input']}\nAI: {s['ai_response']}" 166 | for s in segments 167 | ]) 168 | 169 | # Use LLM to create compressed summary 170 | compression_prompt = ( 171 | f"Compress the following conversations into a concise summary that retains " 172 | f"the key information while reducing length by approximately {int(self.compression_ratio * 100)}%. " 173 | f"Focus on facts, decisions, and context that might be relevant later.\n\n" 174 | f"Conversations:\n{combined_text}\n\n" 175 | f"Compressed Summary:" 176 | ) 177 | 178 | compressed_content = generate_text( 179 | "You are a memory compression expert.", 180 | compression_prompt, 181 | self.client 182 | ) 183 | 184 | compressed_tokens = count_tokens(compressed_content) 185 | original_tokens = sum(s["token_count"] for s in segments) 186 | 187 | self.compression_stats["compressed_tokens"] += compressed_tokens 188 | 189 | return { 190 | "type": "compressed", 191 | "content": compressed_content, 192 | "original_segments": len(segments), 193 | "compression_ratio": compressed_tokens / original_tokens if original_tokens > 0 else 0, 194 | "timestamp_range": (segments[0]["timestamp"], segments[-1]["timestamp"]) 195 | } 196 | 197 | def get_context(self, query: str) -> str: 198 | """ 199 | Retrieve relevant context from both active segments and compressed memory. 200 | 201 | Args: 202 | query: Current user query 203 | 204 | Returns: 205 | Relevant context from compressed and active memory 206 | """ 207 | context_parts = [] 208 | 209 | # Add relevant compressed memory 210 | for compressed_segment in self.compressed_memory: 211 | if self._is_relevant_to_query(compressed_segment["content"], query): 212 | context_parts.append(f"[Compressed Memory]: {compressed_segment['content']}") 213 | 214 | # Add recent active segments 215 | for segment in self.memory_segments[-3:]: # Last 3 active segments 216 | context_parts.append(f"User: {segment['user_input']}\nAI: {segment['ai_response']}") 217 | 218 | if not context_parts: 219 | return "No relevant information in memory yet." 220 | 221 | return "### Memory Context:\n" + "\n---\n".join(context_parts) 222 | 223 | def _is_relevant_to_query(self, content: str, query: str) -> bool: 224 | """ 225 | Simple relevance check based on keyword overlap. 226 | 227 | Args: 228 | content: Memory content to check 229 | query: User query 230 | 231 | Returns: 232 | True if content is relevant to query 233 | """ 234 | query_words = set(query.lower().split()) 235 | content_words = set(content.lower().split()) 236 | 237 | # Check for word overlap (simple heuristic) 238 | overlap = len(query_words.intersection(content_words)) 239 | return overlap >= 2 # At least 2 words in common 240 | 241 | def clear(self) -> None: 242 | """Reset all memory storage and statistics.""" 243 | self.memory_segments = [] 244 | self.compressed_memory = [] 245 | self.compression_stats = { 246 | "original_tokens": 0, 247 | "compressed_tokens": 0, 248 | "compression_count": 0 249 | } 250 | print("Compression memory cleared.") 251 | 252 | def get_memory_stats(self) -> Dict[str, Any]: 253 | """ 254 | Get comprehensive statistics about memory compression. 255 | 256 | Returns: 257 | Dictionary containing memory statistics 258 | """ 259 | active_segments = len(self.memory_segments) 260 | compressed_segments = len(self.compressed_memory) 261 | 262 | overall_compression_ratio = ( 263 | self.compression_stats["compressed_tokens"] / self.compression_stats["original_tokens"] 264 | if self.compression_stats["original_tokens"] > 0 else 0 265 | ) 266 | 267 | return { 268 | "strategy_type": "CompressionMemory", 269 | "compression_ratio_target": self.compression_ratio, 270 | "importance_threshold": self.importance_threshold, 271 | "active_segments": active_segments, 272 | "compressed_segments": compressed_segments, 273 | "compression_stats": self.compression_stats, 274 | "overall_compression_ratio": overall_compression_ratio, 275 | "memory_size": f"{active_segments} active + {compressed_segments} compressed", 276 | "advantages": ["Space reduction", "Intelligent merging", "Redundancy filtering"], 277 | "disadvantages": ["Complex algorithms", "Information loss", "Computational overhead"] 278 | } 279 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /api.py: -------------------------------------------------------------------------------- 1 | """ 2 | FastAPI Application for AI Agent Memory Strategies 3 | 4 | This API provides endpoints to interact with all 9 memory optimization techniques 5 | through RESTful endpoints. Each strategy can be used independently via the API. 6 | """ 7 | 8 | import os 9 | import uuid 10 | from typing import Dict, List, Optional, Any 11 | from fastapi import FastAPI, HTTPException, Depends 12 | from fastapi.middleware.cors import CORSMiddleware 13 | from pydantic import BaseModel 14 | from contextlib import asynccontextmanager 15 | 16 | from memory_strategies import ( 17 | AIAgent, 18 | SequentialMemory, 19 | SlidingWindowMemory, 20 | SummarizationMemory, 21 | RetrievalMemory, 22 | MemoryAugmentedMemory, 23 | HierarchicalMemory, 24 | GraphMemory, 25 | CompressionMemory, 26 | OSMemory, 27 | STRATEGY_INFO, 28 | get_openai_client 29 | ) 30 | 31 | 32 | # Pydantic models for API requests/responses 33 | class ChatRequest(BaseModel): 34 | message: str 35 | api_key: Optional[str] = None 36 | 37 | 38 | class ChatResponse(BaseModel): 39 | response: str 40 | user_input: str 41 | retrieval_time: float 42 | generation_time: float 43 | prompt_tokens: int 44 | session_id: str 45 | strategy_type: str 46 | 47 | 48 | class SessionCreateRequest(BaseModel): 49 | strategy_type: str 50 | strategy_config: Optional[Dict[str, Any]] = {} 51 | system_prompt: Optional[str] = "You are a helpful AI assistant." 52 | api_key: Optional[str] = None 53 | 54 | 55 | class SessionResponse(BaseModel): 56 | session_id: str 57 | strategy_type: str 58 | strategy_config: Dict[str, Any] 59 | created: bool 60 | 61 | 62 | class MemoryStatsResponse(BaseModel): 63 | session_id: str 64 | strategy_type: str 65 | memory_stats: Dict[str, Any] 66 | 67 | 68 | class StrategyInfoResponse(BaseModel): 69 | strategy_name: str 70 | complexity: str 71 | description: str 72 | best_for: str 73 | default_config: Dict[str, Any] 74 | 75 | 76 | # Global storage for active sessions 77 | active_sessions: Dict[str, AIAgent] = {} 78 | 79 | # Available strategies with their default configurations 80 | AVAILABLE_STRATEGIES = { 81 | "sequential": { 82 | "class": SequentialMemory, 83 | "default_config": {}, 84 | "description": "Stores all conversation history chronologically" 85 | }, 86 | "sliding_window": { 87 | "class": SlidingWindowMemory, 88 | "default_config": {"window_size": 4}, 89 | "description": "Maintains only the most recent N conversations" 90 | }, 91 | "summarization": { 92 | "class": SummarizationMemory, 93 | "default_config": {"summary_threshold": 4}, 94 | "description": "Compresses conversation history using LLM summarization" 95 | }, 96 | "retrieval": { 97 | "class": RetrievalMemory, 98 | "default_config": {"k": 2, "embedding_dim": 1536}, 99 | "description": "Uses vector embeddings and similarity search (RAG)" 100 | }, 101 | "memory_augmented": { 102 | "class": MemoryAugmentedMemory, 103 | "default_config": {"window_size": 2}, 104 | "description": "Combines sliding window with persistent memory tokens" 105 | }, 106 | "hierarchical": { 107 | "class": HierarchicalMemory, 108 | "default_config": {"window_size": 2, "k": 2, "embedding_dim": 1536}, 109 | "description": "Multi-layered system with working + long-term memory" 110 | }, 111 | "graph": { 112 | "class": GraphMemory, 113 | "default_config": {}, 114 | "description": "Treats conversations as nodes with relationship edges" 115 | }, 116 | "compression": { 117 | "class": CompressionMemory, 118 | "default_config": {"compression_ratio": 0.5, "importance_threshold": 0.7}, 119 | "description": "Intelligent compression and integration of historical data" 120 | }, 121 | "os_memory": { 122 | "class": OSMemory, 123 | "default_config": {"ram_size": 2}, 124 | "description": "Simulates RAM/disk with active/passive memory" 125 | } 126 | } 127 | 128 | 129 | @asynccontextmanager 130 | async def lifespan(app: FastAPI): 131 | """Application lifespan manager.""" 132 | # Startup 133 | print("Starting AI Agent Memory Strategies API...") 134 | yield 135 | # Shutdown 136 | print("Shutting down API...") 137 | active_sessions.clear() 138 | 139 | 140 | # Initialize FastAPI app 141 | app = FastAPI( 142 | title="AI Agent Memory Design & Optimization API", 143 | description="RESTful API for testing and using multiple AI agent memory optimization techniques", 144 | version="1.0.0", 145 | lifespan=lifespan 146 | ) 147 | 148 | # Add CORS middleware 149 | app.add_middleware( 150 | CORSMiddleware, 151 | allow_origins=["*"], 152 | allow_credentials=True, 153 | allow_methods=["*"], 154 | allow_headers=["*"], 155 | ) 156 | 157 | 158 | def get_openai_client_with_key(api_key: Optional[str] = None): 159 | """Get OpenAI client with provided API key or from environment.""" 160 | if api_key: 161 | from openai import OpenAI 162 | return OpenAI(api_key=api_key) 163 | return get_openai_client() 164 | 165 | 166 | @app.get("/") 167 | async def root(): 168 | """Root endpoint with API information.""" 169 | return { 170 | "message": "AI Agent Memory Strategies API", 171 | "version": "1.0.0", 172 | "available_strategies": list(AVAILABLE_STRATEGIES.keys()), 173 | "endpoints": { 174 | "GET /strategies": "List all available memory strategies", 175 | "POST /sessions": "Create a new chat session with a memory strategy", 176 | "POST /sessions/{session_id}/chat": "Send a message to a specific session", 177 | "GET /sessions/{session_id}/stats": "Get memory statistics for a session", 178 | "DELETE /sessions/{session_id}": "Delete a session", 179 | "GET /sessions": "List all active sessions" 180 | } 181 | } 182 | 183 | 184 | @app.get("/strategies", response_model=List[StrategyInfoResponse]) 185 | async def list_strategies(): 186 | """List all available memory strategies with their information.""" 187 | strategies = [] 188 | 189 | for strategy_key, strategy_data in AVAILABLE_STRATEGIES.items(): 190 | strategy_class = strategy_data["class"] 191 | strategy_name = strategy_class.__name__ 192 | 193 | # Get strategy info from STRATEGY_INFO 194 | info = STRATEGY_INFO.get(strategy_name, {}) 195 | 196 | strategies.append(StrategyInfoResponse( 197 | strategy_name=strategy_key, 198 | complexity=info.get("complexity", "Unknown"), 199 | description=info.get("description", strategy_data["description"]), 200 | best_for=info.get("best_for", "General use"), 201 | default_config=strategy_data["default_config"] 202 | )) 203 | 204 | return strategies 205 | 206 | 207 | @app.post("/sessions", response_model=SessionResponse) 208 | async def create_session(request: SessionCreateRequest): 209 | """Create a new chat session with specified memory strategy.""" 210 | if request.strategy_type not in AVAILABLE_STRATEGIES: 211 | raise HTTPException( 212 | status_code=400, 213 | detail=f"Invalid strategy type. Available: {list(AVAILABLE_STRATEGIES.keys())}" 214 | ) 215 | 216 | try: 217 | # Get strategy configuration 218 | strategy_info = AVAILABLE_STRATEGIES[request.strategy_type] 219 | strategy_class = strategy_info["class"] 220 | 221 | # Merge default config with user config 222 | config = {**strategy_info["default_config"], **request.strategy_config} 223 | 224 | # Get OpenAI client 225 | client = get_openai_client_with_key(request.api_key) 226 | 227 | # Add client to config if strategy supports it 228 | if hasattr(strategy_class, '__init__'): 229 | import inspect 230 | sig = inspect.signature(strategy_class.__init__) 231 | if 'client' in sig.parameters: 232 | config['client'] = client 233 | 234 | # Initialize memory strategy 235 | memory_strategy = strategy_class(**config) 236 | 237 | # Create AI agent 238 | agent = AIAgent( 239 | memory_strategy=memory_strategy, 240 | system_prompt=request.system_prompt, 241 | client=client 242 | ) 243 | 244 | # Generate session ID and store 245 | session_id = str(uuid.uuid4()) 246 | active_sessions[session_id] = agent 247 | 248 | return SessionResponse( 249 | session_id=session_id, 250 | strategy_type=request.strategy_type, 251 | strategy_config=config, 252 | created=True 253 | ) 254 | 255 | except Exception as e: 256 | raise HTTPException(status_code=500, detail=f"Error creating session: {str(e)}") 257 | 258 | 259 | @app.post("/sessions/{session_id}/chat", response_model=ChatResponse) 260 | async def chat_with_session(session_id: str, request: ChatRequest): 261 | """Send a message to a specific session.""" 262 | if session_id not in active_sessions: 263 | raise HTTPException(status_code=404, detail="Session not found") 264 | 265 | try: 266 | agent = active_sessions[session_id] 267 | 268 | # Process the chat message 269 | result = agent.chat(request.message, verbose=False) 270 | 271 | return ChatResponse( 272 | response=result["ai_response"], 273 | user_input=result["user_input"], 274 | retrieval_time=result["retrieval_time"], 275 | generation_time=result["generation_time"], 276 | prompt_tokens=result["prompt_tokens"], 277 | session_id=session_id, 278 | strategy_type=type(agent.memory).__name__ 279 | ) 280 | 281 | except Exception as e: 282 | raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}") 283 | 284 | 285 | @app.get("/sessions/{session_id}/stats", response_model=MemoryStatsResponse) 286 | async def get_session_stats(session_id: str): 287 | """Get memory statistics for a specific session.""" 288 | if session_id not in active_sessions: 289 | raise HTTPException(status_code=404, detail="Session not found") 290 | 291 | try: 292 | agent = active_sessions[session_id] 293 | stats = agent.get_memory_stats() 294 | 295 | return MemoryStatsResponse( 296 | session_id=session_id, 297 | strategy_type=type(agent.memory).__name__, 298 | memory_stats=stats 299 | ) 300 | 301 | except Exception as e: 302 | raise HTTPException(status_code=500, detail=f"Error getting stats: {str(e)}") 303 | 304 | 305 | @app.delete("/sessions/{session_id}") 306 | async def delete_session(session_id: str): 307 | """Delete a specific session.""" 308 | if session_id not in active_sessions: 309 | raise HTTPException(status_code=404, detail="Session not found") 310 | 311 | del active_sessions[session_id] 312 | return {"message": f"Session {session_id} deleted successfully"} 313 | 314 | 315 | @app.get("/sessions") 316 | async def list_sessions(): 317 | """List all active sessions.""" 318 | sessions = [] 319 | for session_id, agent in active_sessions.items(): 320 | sessions.append({ 321 | "session_id": session_id, 322 | "strategy_type": type(agent.memory).__name__, 323 | "system_prompt": agent.system_prompt[:50] + "..." if len(agent.system_prompt) > 50 else agent.system_prompt 324 | }) 325 | 326 | return {"active_sessions": len(sessions), "sessions": sessions} 327 | 328 | 329 | @app.post("/sessions/{session_id}/clear") 330 | async def clear_session_memory(session_id: str): 331 | """Clear memory for a specific session.""" 332 | if session_id not in active_sessions: 333 | raise HTTPException(status_code=404, detail="Session not found") 334 | 335 | try: 336 | agent = active_sessions[session_id] 337 | agent.clear_memory() 338 | return {"message": f"Memory cleared for session {session_id}"} 339 | 340 | except Exception as e: 341 | raise HTTPException(status_code=500, detail=f"Error clearing memory: {str(e)}") 342 | 343 | 344 | # Health check endpoint 345 | @app.get("/health") 346 | async def health_check(): 347 | """Health check endpoint.""" 348 | return { 349 | "status": "healthy", 350 | "active_sessions": len(active_sessions), 351 | "openai_configured": bool(os.getenv("OPENAI_API_KEY")) 352 | } 353 | 354 | 355 | if __name__ == "__main__": 356 | import uvicorn 357 | uvicorn.run(app, host="0.0.0.0", port=8000) 358 | -------------------------------------------------------------------------------- /streamlit_playground.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI Agent Memory Design & Optimization - Streamlit Playground 3 | """ 4 | 5 | import streamlit as st 6 | import plotly.express as px 7 | import plotly.graph_objects as go 8 | import pandas as pd 9 | import time 10 | import json 11 | from typing import Dict, Any, List 12 | import os 13 | 14 | # Import memory strategies 15 | from memory_strategies import ( 16 | AIAgent, 17 | SequentialMemory, 18 | SlidingWindowMemory, 19 | SummarizationMemory, 20 | RetrievalMemory, 21 | MemoryAugmentedMemory, 22 | HierarchicalMemory, 23 | GraphMemory, 24 | CompressionMemory, 25 | OSMemory, 26 | STRATEGY_INFO 27 | ) 28 | 29 | # Page configuration 30 | st.set_page_config( 31 | page_title="AI Agent Memory Design & Optimization", 32 | page_icon="🤖", 33 | layout="wide", 34 | initial_sidebar_state="expanded" 35 | ) 36 | 37 | # Custom CSS for modern styling 38 | st.markdown(""" 39 | 92 | """, unsafe_allow_html=True) 93 | 94 | # Initialize session state 95 | def initialize_session_state(): 96 | """Initialize Streamlit session state variables.""" 97 | if 'agents' not in st.session_state: 98 | st.session_state.agents = {} 99 | if 'chat_history' not in st.session_state: 100 | st.session_state.chat_history = {} 101 | if 'performance_metrics' not in st.session_state: 102 | st.session_state.performance_metrics = {} 103 | if 'api_key_set' not in st.session_state: 104 | st.session_state.api_key_set = False 105 | 106 | def setup_openai_client(api_key: str): 107 | """Setup OpenAI client with provided API key.""" 108 | if api_key: 109 | os.environ["OPENAI_API_KEY"] = api_key 110 | st.session_state.api_key_set = True 111 | return True 112 | return False 113 | 114 | def get_strategy_class_and_config(strategy_name: str) -> tuple: 115 | """Get strategy class and default configuration.""" 116 | strategy_mapping = { 117 | "Sequential Memory": (SequentialMemory, {}), 118 | "Sliding Window Memory": (SlidingWindowMemory, {"window_size": 4}), 119 | "Summarization Memory": (SummarizationMemory, {"summary_threshold": 4}), 120 | "Retrieval Memory (RAG)": (RetrievalMemory, {"k": 2}), 121 | "Memory-Augmented Memory": (MemoryAugmentedMemory, {"window_size": 2}), 122 | "Hierarchical Memory": (HierarchicalMemory, {"window_size": 2, "k": 2}), 123 | "Graph Memory": (GraphMemory, {}), 124 | "Compression Memory": (CompressionMemory, {"compression_ratio": 0.5}), 125 | "OS-like Memory": (OSMemory, {"ram_size": 2}) 126 | } 127 | return strategy_mapping.get(strategy_name, (SequentialMemory, {})) 128 | 129 | def create_agent(strategy_name: str, config: Dict[str, Any]) -> AIAgent: 130 | """Create an AI agent with specified strategy and configuration.""" 131 | strategy_class, default_config = get_strategy_class_and_config(strategy_name) 132 | 133 | # Merge configurations 134 | final_config = {**default_config, **config} 135 | 136 | # Create strategy instance 137 | memory_strategy = strategy_class(**final_config) 138 | 139 | # Create agent 140 | agent = AIAgent( 141 | memory_strategy=memory_strategy, 142 | system_prompt="You are a helpful AI assistant with advanced memory capabilities." 143 | ) 144 | 145 | return agent 146 | 147 | def render_sidebar(): 148 | """Render the sidebar with API key input and strategy information.""" 149 | st.sidebar.markdown(""" 150 | 154 | """, unsafe_allow_html=True) 155 | 156 | # API Key input 157 | api_key = st.sidebar.text_input( 158 | "OpenAI API Key", 159 | type="password", 160 | placeholder="sk-...", 161 | help="Enter your OpenAI API key. This is required for all memory strategies." 162 | ) 163 | 164 | if api_key: 165 | if setup_openai_client(api_key): 166 | st.sidebar.success("✅ API Key configured successfully!") 167 | else: 168 | st.sidebar.error("❌ Invalid API Key") 169 | elif not st.session_state.api_key_set: 170 | st.sidebar.warning("⚠️ Please enter your OpenAI API key to continue") 171 | return False 172 | 173 | st.sidebar.markdown("---") 174 | 175 | # Strategy information 176 | st.sidebar.markdown(""" 177 | 181 | """, unsafe_allow_html=True) 182 | 183 | # Strategy complexity legend 184 | st.sidebar.markdown("### Complexity Levels") 185 | st.sidebar.markdown("🟢 **Basic** - Simple implementation") 186 | st.sidebar.markdown("🟡 **Advanced** - Moderate complexity") 187 | st.sidebar.markdown("🔴 **Complex** - High complexity") 188 | 189 | # Attribution section 190 | st.sidebar.markdown("---") 191 | st.sidebar.markdown( 192 | """ 193 |
194 |

Built by AI Anytime ❤️

195 |

aianytime.net

196 |

Creator Portfolio: sonukumar.site

197 |

YouTube: @AIAnytime

198 |
199 | """, 200 | unsafe_allow_html=True 201 | ) 202 | 203 | return True 204 | 205 | def render_main_header(): 206 | """Render the main header.""" 207 | st.markdown(""" 208 |
209 |

AI Agent Memory Design & Optimization Playground

210 |

Interactive testing environment for multiple memory optimization techniques

211 |
212 | """, unsafe_allow_html=True) 213 | 214 | def render_strategy_overview(): 215 | """Render strategy overview cards.""" 216 | st.markdown("## Available Memory Strategies") 217 | 218 | # Create columns for strategy cards 219 | col1, col2, col3 = st.columns(3) 220 | 221 | strategies = [ 222 | ("Sequential Memory", "🟢", "Stores all conversation history"), 223 | ("Sliding Window Memory", "🟢", "Recent N conversations only"), 224 | ("Summarization Memory", "🟢", "LLM-based compression"), 225 | ("Retrieval Memory (RAG)", "🟡", "Vector similarity search"), 226 | ("Memory-Augmented Memory", "🟡", "Persistent memory tokens"), 227 | ("Hierarchical Memory", "🟡", "Multi-layered memory"), 228 | ("Graph Memory", "🔴", "Relationship modeling"), 229 | ("Compression Memory", "🔴", "Intelligent compression"), 230 | ("OS-like Memory", "🔴", "RAM/disk simulation") 231 | ] 232 | 233 | for i, (name, complexity, desc) in enumerate(strategies): 234 | col = [col1, col2, col3][i % 3] 235 | with col: 236 | st.markdown(f""" 237 |
238 |

{complexity} {name}

239 |

{desc}

240 |
241 | """, unsafe_allow_html=True) 242 | 243 | def render_single_strategy_tester(): 244 | """Render single strategy testing interface.""" 245 | st.markdown("## Single Strategy Tester") 246 | 247 | col1, col2 = st.columns([1, 2]) 248 | 249 | with col1: 250 | # Strategy selection 251 | strategy_name = st.selectbox( 252 | "Choose Memory Strategy", 253 | [ 254 | "Sequential Memory", 255 | "Sliding Window Memory", 256 | "Summarization Memory", 257 | "Retrieval Memory (RAG)", 258 | "Memory-Augmented Memory", 259 | "Hierarchical Memory", 260 | "Graph Memory", 261 | "Compression Memory", 262 | "OS-like Memory" 263 | ] 264 | ) 265 | 266 | # Strategy configuration 267 | st.markdown("### Configuration") 268 | config = {} 269 | 270 | if strategy_name == "Sliding Window Memory": 271 | config["window_size"] = st.slider("Window Size", 1, 10, 4) 272 | elif strategy_name == "Summarization Memory": 273 | config["summary_threshold"] = st.slider("Summary Threshold", 2, 10, 4) 274 | elif strategy_name == "Retrieval Memory (RAG)": 275 | config["k"] = st.slider("Retrieval Count (k)", 1, 5, 2) 276 | elif strategy_name == "Memory-Augmented Memory": 277 | config["window_size"] = st.slider("Window Size", 1, 5, 2) 278 | elif strategy_name == "Hierarchical Memory": 279 | config["window_size"] = st.slider("Working Memory Size", 1, 5, 2) 280 | config["k"] = st.slider("Long-term Retrieval (k)", 1, 5, 2) 281 | elif strategy_name == "Compression Memory": 282 | config["compression_ratio"] = st.slider("Compression Ratio", 0.1, 0.9, 0.5) 283 | elif strategy_name == "OS-like Memory": 284 | config["ram_size"] = st.slider("RAM Size", 1, 5, 2) 285 | 286 | # Initialize agent button 287 | if st.button("🚀 Initialize Agent", type="primary"): 288 | try: 289 | agent = create_agent(strategy_name, config) 290 | st.session_state.agents[strategy_name] = agent 291 | st.session_state.chat_history[strategy_name] = [] 292 | st.session_state.performance_metrics[strategy_name] = [] 293 | st.success(f"✅ {strategy_name} agent initialized!") 294 | except Exception as e: 295 | st.error(f"❌ Error initializing agent: {str(e)}") 296 | 297 | with col2: 298 | if strategy_name in st.session_state.agents: 299 | # Chat interface 300 | st.markdown("### Chat Interface") 301 | 302 | # Display chat history 303 | chat_container = st.container() 304 | with chat_container: 305 | for msg in st.session_state.chat_history[strategy_name]: 306 | if msg["role"] == "user": 307 | st.markdown(f""" 308 |
309 | You: {msg["content"]} 310 |
311 | """, unsafe_allow_html=True) 312 | else: 313 | st.markdown(f""" 314 |
315 | AI: {msg["content"]} 316 |
⏱️ {msg.get('time', 0):.2f}s | 🔢 {msg.get('tokens', 0)} tokens 317 |
318 | """, unsafe_allow_html=True) 319 | 320 | # Chat input 321 | user_input = st.text_input("Your message:", key=f"input_{strategy_name}") 322 | 323 | col_send, col_clear = st.columns([1, 1]) 324 | 325 | with col_send: 326 | if st.button("Send", key=f"send_{strategy_name}"): 327 | if user_input: 328 | try: 329 | agent = st.session_state.agents[strategy_name] 330 | result = agent.chat(user_input, verbose=False) 331 | 332 | # Add to chat history 333 | st.session_state.chat_history[strategy_name].extend([ 334 | {"role": "user", "content": user_input}, 335 | { 336 | "role": "assistant", 337 | "content": result["ai_response"], 338 | "time": result["generation_time"], 339 | "tokens": result["prompt_tokens"] 340 | } 341 | ]) 342 | 343 | # Add to performance metrics 344 | st.session_state.performance_metrics[strategy_name].append({ 345 | "turn": len(st.session_state.performance_metrics[strategy_name]) + 1, 346 | "tokens": result["prompt_tokens"], 347 | "retrieval_time": result["retrieval_time"], 348 | "generation_time": result["generation_time"] 349 | }) 350 | 351 | st.rerun() 352 | 353 | except Exception as e: 354 | st.error(f"❌ Error: {str(e)}") 355 | 356 | with col_clear: 357 | if st.button("🗑️ Clear", key=f"clear_{strategy_name}"): 358 | if strategy_name in st.session_state.agents: 359 | st.session_state.agents[strategy_name].clear_memory() 360 | st.session_state.chat_history[strategy_name] = [] 361 | st.session_state.performance_metrics[strategy_name] = [] 362 | st.success("🧹 Memory cleared!") 363 | st.rerun() 364 | 365 | # Memory statistics 366 | if strategy_name in st.session_state.agents: 367 | st.markdown("### Memory Statistics") 368 | try: 369 | stats = st.session_state.agents[strategy_name].get_memory_stats() 370 | 371 | # Display key metrics 372 | metric_cols = st.columns(3) 373 | with metric_cols[0]: 374 | st.markdown(f""" 375 |
376 |

Strategy Type

377 |

{stats.get('strategy_type', 'Unknown')}

378 |
379 | """, unsafe_allow_html=True) 380 | 381 | with metric_cols[1]: 382 | st.markdown(f""" 383 |
384 |

Memory Size

385 |

{stats.get('memory_size', 'Unknown')}

386 |
387 | """, unsafe_allow_html=True) 388 | 389 | with metric_cols[2]: 390 | turns = len(st.session_state.chat_history[strategy_name]) // 2 391 | st.markdown(f""" 392 |
393 |

Conversation Turns

394 |

{turns}

395 |
396 | """, unsafe_allow_html=True) 397 | 398 | # Detailed stats 399 | with st.expander("📈 Detailed Statistics"): 400 | st.json(stats) 401 | 402 | except Exception as e: 403 | st.error(f"Error getting stats: {str(e)}") 404 | else: 405 | st.info("👈 Please initialize an agent first to start chatting!") 406 | 407 | def render_performance_dashboard(): 408 | """Render performance comparison dashboard.""" 409 | st.markdown("## Performance Dashboard") 410 | 411 | if not st.session_state.performance_metrics: 412 | st.info("No performance data yet. Test some strategies to see metrics!") 413 | return 414 | 415 | # Create performance comparison charts 416 | col1, col2 = st.columns(2) 417 | 418 | with col1: 419 | st.markdown("### Token Usage Over Time") 420 | 421 | # Prepare data for token usage chart 422 | token_data = [] 423 | for strategy, metrics in st.session_state.performance_metrics.items(): 424 | for metric in metrics: 425 | token_data.append({ 426 | "Strategy": strategy, 427 | "Turn": metric["turn"], 428 | "Tokens": metric["tokens"] 429 | }) 430 | 431 | if token_data: 432 | df_tokens = pd.DataFrame(token_data) 433 | fig_tokens = px.line( 434 | df_tokens, 435 | x="Turn", 436 | y="Tokens", 437 | color="Strategy", 438 | title="Token Usage Comparison", 439 | color_discrete_sequence=px.colors.qualitative.Set3 440 | ) 441 | fig_tokens.update_layout( 442 | plot_bgcolor='rgba(0,0,0,0)', 443 | paper_bgcolor='rgba(0,0,0,0)' 444 | ) 445 | st.plotly_chart(fig_tokens, use_container_width=True) 446 | 447 | with col2: 448 | st.markdown("### Response Time Analysis") 449 | 450 | # Prepare data for response time chart 451 | time_data = [] 452 | for strategy, metrics in st.session_state.performance_metrics.items(): 453 | for metric in metrics: 454 | time_data.append({ 455 | "Strategy": strategy, 456 | "Turn": metric["turn"], 457 | "Generation Time": metric["generation_time"], 458 | "Retrieval Time": metric["retrieval_time"] 459 | }) 460 | 461 | if time_data: 462 | df_times = pd.DataFrame(time_data) 463 | fig_times = px.bar( 464 | df_times, 465 | x="Strategy", 466 | y=["Generation Time", "Retrieval Time"], 467 | title="Average Response Times", 468 | color_discrete_sequence=["#667eea", "#764ba2"] 469 | ) 470 | fig_times.update_layout( 471 | plot_bgcolor='rgba(0,0,0,0)', 472 | paper_bgcolor='rgba(0,0,0,0)' 473 | ) 474 | st.plotly_chart(fig_times, use_container_width=True) 475 | 476 | def render_batch_tester(): 477 | """Render batch testing interface for comparing multiple strategies.""" 478 | st.markdown("## Batch Strategy Comparison") 479 | 480 | col1, col2 = st.columns([1, 2]) 481 | 482 | with col1: 483 | st.markdown("### Test Configuration") 484 | 485 | # Strategy selection 486 | strategies_to_test = st.multiselect( 487 | "Select Strategies to Compare", 488 | [ 489 | "Sequential Memory", 490 | "Sliding Window Memory", 491 | "Retrieval Memory (RAG)", 492 | "Hierarchical Memory" 493 | ], 494 | default=["Sequential Memory", "Retrieval Memory (RAG)"] 495 | ) 496 | 497 | # Test conversations 498 | st.markdown("### Test Conversations") 499 | test_conversations = st.text_area( 500 | "Enter test messages (one per line)", 501 | value="Hi! My name is Alex and I'm a software engineer.\nI'm working on a machine learning project.\nI prefer Python and love coffee.\nWhat do you remember about me?", 502 | height=150 503 | ).split('\n') 504 | 505 | if st.button("🚀 Run Batch Test", type="primary"): 506 | if strategies_to_test and test_conversations: 507 | run_batch_test(strategies_to_test, test_conversations) 508 | 509 | with col2: 510 | if 'batch_results' in st.session_state: 511 | st.markdown("### Batch Test Results") 512 | display_batch_results() 513 | 514 | def run_batch_test(strategies: List[str], conversations: List[str]): 515 | """Run batch test on multiple strategies.""" 516 | results = {} 517 | 518 | progress_bar = st.progress(0) 519 | status_text = st.empty() 520 | 521 | total_steps = len(strategies) * len(conversations) 522 | current_step = 0 523 | 524 | for strategy_name in strategies: 525 | status_text.text(f"Testing {strategy_name}...") 526 | 527 | try: 528 | # Create agent 529 | agent = create_agent(strategy_name, {}) 530 | 531 | strategy_results = { 532 | "responses": [], 533 | "metrics": [], 534 | "final_stats": {} 535 | } 536 | 537 | # Run conversations 538 | for i, conversation in enumerate(conversations): 539 | if conversation.strip(): 540 | result = agent.chat(conversation.strip(), verbose=False) 541 | 542 | strategy_results["responses"].append({ 543 | "turn": i + 1, 544 | "user": conversation.strip(), 545 | "ai": result["ai_response"], 546 | "tokens": result["prompt_tokens"], 547 | "time": result["generation_time"] 548 | }) 549 | 550 | strategy_results["metrics"].append({ 551 | "turn": i + 1, 552 | "tokens": result["prompt_tokens"], 553 | "generation_time": result["generation_time"], 554 | "retrieval_time": result["retrieval_time"] 555 | }) 556 | 557 | current_step += 1 558 | progress_bar.progress(current_step / total_steps) 559 | 560 | # Get final memory stats 561 | strategy_results["final_stats"] = agent.get_memory_stats() 562 | results[strategy_name] = strategy_results 563 | 564 | except Exception as e: 565 | st.error(f"Error testing {strategy_name}: {str(e)}") 566 | 567 | st.session_state.batch_results = results 568 | status_text.text("✅ Batch test completed!") 569 | progress_bar.progress(1.0) 570 | 571 | def display_batch_results(): 572 | """Display batch test results.""" 573 | results = st.session_state.batch_results 574 | 575 | # Summary metrics 576 | st.markdown("#### Summary Metrics") 577 | 578 | summary_data = [] 579 | for strategy, data in results.items(): 580 | if data["metrics"]: 581 | avg_tokens = sum(m["tokens"] for m in data["metrics"]) / len(data["metrics"]) 582 | avg_time = sum(m["generation_time"] for m in data["metrics"]) / len(data["metrics"]) 583 | 584 | summary_data.append({ 585 | "Strategy": strategy, 586 | "Avg Tokens": f"{avg_tokens:.0f}", 587 | "Avg Response Time": f"{avg_time:.2f}s", 588 | "Memory Size": data["final_stats"].get("memory_size", "Unknown") 589 | }) 590 | 591 | if summary_data: 592 | df_summary = pd.DataFrame(summary_data) 593 | st.dataframe(df_summary, use_container_width=True) 594 | 595 | # Detailed results 596 | for strategy, data in results.items(): 597 | with st.expander(f"{strategy} - Detailed Results"): 598 | st.markdown("**Final Response:**") 599 | if data["responses"]: 600 | final_response = data["responses"][-1] 601 | st.markdown(f"*User:* {final_response['user']}") 602 | st.markdown(f"*AI:* {final_response['ai']}") 603 | 604 | st.markdown("**Memory Statistics:**") 605 | st.json(data["final_stats"]) 606 | 607 | def main(): 608 | """Main application function.""" 609 | initialize_session_state() 610 | 611 | # Render sidebar 612 | if not render_sidebar(): 613 | st.warning("⚠️ Please configure your OpenAI API key in the sidebar to continue.") 614 | return 615 | 616 | # Render main content 617 | render_main_header() 618 | 619 | # Navigation tabs 620 | tab1, tab2, tab3, tab4 = st.tabs([ 621 | "Overview", 622 | "Single Tester", 623 | "Batch Comparison", 624 | "Performance Dashboard" 625 | ]) 626 | 627 | with tab1: 628 | render_strategy_overview() 629 | 630 | st.markdown("## Getting Started") 631 | st.markdown(""" 632 | 1. **Configure API Key**: Enter your OpenAI API key in the sidebar 633 | 2. **Choose Strategy**: Select a memory strategy to test 634 | 3. **Configure Settings**: Adjust strategy parameters as needed 635 | 4. **Start Chatting**: Initialize an agent and begin testing 636 | 5. **Compare Performance**: Use batch testing to compare strategies 637 | """) 638 | 639 | st.markdown("## Strategy Guide") 640 | 641 | guide_col1, guide_col2 = st.columns(2) 642 | 643 | with guide_col1: 644 | st.markdown(""" 645 | **🟢 Basic Strategies (Easy to implement)** 646 | - **Sequential**: Perfect recall, but expensive for long chats 647 | - **Sliding Window**: Fixed memory, loses old information 648 | - **Summarization**: Compresses history, may lose details 649 | """) 650 | 651 | with guide_col2: 652 | st.markdown(""" 653 | **🟡🔴 Advanced Strategies (Production-ready)** 654 | - **Retrieval (RAG)**: Industry standard, semantic search 655 | - **Hierarchical**: Human-like memory patterns 656 | - **Graph**: Complex relationship modeling 657 | """) 658 | 659 | with tab2: 660 | render_single_strategy_tester() 661 | 662 | with tab3: 663 | render_batch_tester() 664 | 665 | with tab4: 666 | render_performance_dashboard() 667 | 668 | if __name__ == "__main__": 669 | main() 670 | --------------------------------------------------------------------------------